From eb07891ddf3be613d84fdea7ac5c790cf8e8416c Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:19:32 -0800 Subject: [PATCH 01/81] flake8 and validate path --- argschema/fields/files.py | 5 +++++ argschema/utils.py | 4 ---- test/fields/test_numpyarray.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/argschema/fields/files.py b/argschema/fields/files.py index 1172d1b8..ae896d72 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -146,6 +146,11 @@ def _validate(self, value): # use outputfile to test that a file in this location is a valid path validate_outpath(value) +def validate_input_path(value): + if not os.path.isfile(value): + raise mm.ValidationError("%s is not a file" % value) + elif not os.access(value, os.R_OK): + raise mm.ValidationError("%s is not readable" % value) def validate_input_path(value): if not os.path.isfile(value): diff --git a/argschema/utils.py b/argschema/utils.py index 9e9c640d..cbe0b31b 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -403,12 +403,10 @@ def load(schema, d): schema that you want to use to validate d: dict dictionary to validate and load - Returns ------- dict deserialized and validated dictionary - Raises ------ marshmallow.ValidationError @@ -429,12 +427,10 @@ def dump(schema, d): schema that you want to use to validate and dump d: dict dictionary to validate and dump - Returns ------- dict serialized and validated dictionary - Raises ------ marshmallow.ValidationError diff --git a/test/fields/test_numpyarray.py b/test/fields/test_numpyarray.py index 6dddafb9..d44723af 100644 --- a/test/fields/test_numpyarray.py +++ b/test/fields/test_numpyarray.py @@ -1,7 +1,7 @@ import pytest from argschema import ArgSchemaParser, ArgSchema from argschema.fields import NumpyArray -from argschema.utils import dump +from argschema.utils import load,dump import marshmallow as mm import numpy as np From afefa32348c71e6b765a936deeddaedfc10e0401 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 23 Apr 2018 07:49:14 -0700 Subject: [PATCH 02/81] flake8 warning cleanup --- argschema/fields/files.py | 1 + argschema/utils.py | 8 -------- test/fields/test_numpyarray.py | 2 +- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/argschema/fields/files.py b/argschema/fields/files.py index ae896d72..32f8c71b 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -146,6 +146,7 @@ def _validate(self, value): # use outputfile to test that a file in this location is a valid path validate_outpath(value) + def validate_input_path(value): if not os.path.isfile(value): raise mm.ValidationError("%s is not a file" % value) diff --git a/argschema/utils.py b/argschema/utils.py index cbe0b31b..a5791a8b 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -407,10 +407,6 @@ def load(schema, d): ------- dict deserialized and validated dictionary - Raises - ------ - marshmallow.ValidationError - if the dictionary does not conform to the schema """ results = schema.load(d) @@ -431,10 +427,6 @@ def dump(schema, d): ------- dict serialized and validated dictionary - Raises - ------ - marshmallow.ValidationError - if the dictionary does not conform to the schema """ errors=schema.validate(d) if len(errors)>0: diff --git a/test/fields/test_numpyarray.py b/test/fields/test_numpyarray.py index d44723af..6dddafb9 100644 --- a/test/fields/test_numpyarray.py +++ b/test/fields/test_numpyarray.py @@ -1,7 +1,7 @@ import pytest from argschema import ArgSchemaParser, ArgSchema from argschema.fields import NumpyArray -from argschema.utils import load,dump +from argschema.utils import dump import marshmallow as mm import numpy as np From 6bdb0ac24ee3c72b7dff1b4a43055ff1947dfd63 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 23 Apr 2018 08:56:31 -0700 Subject: [PATCH 03/81] removing deprecated --- argschema/argschema_parser.py | 56 --------------------------------- argschema/deprecated.py | 12 ------- argschema/fields/__init__.py | 15 +++++---- argschema/fields/deprecated.py | 34 -------------------- argschema/fields/numpyarrays.py | 2 -- argschema/utils.py | 18 +---------- docs/api/argschema.rst | 8 ----- docs/tests/fields.rst | 8 ----- docs/user/intro.rst | 19 ----------- examples/cli_example.py | 2 -- examples/deprecated_example.py | 15 --------- test/fields/test_deprecated.py | 25 --------------- test/test_cli_overrides.py | 12 ------- test/test_first_test.py | 24 +------------- 14 files changed, 9 insertions(+), 241 deletions(-) delete mode 100644 argschema/deprecated.py delete mode 100644 argschema/fields/deprecated.py delete mode 100644 examples/deprecated_example.py delete mode 100644 test/fields/test_deprecated.py diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index f29e081e..e7402242 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -3,7 +3,6 @@ ''' import json import logging -import copy from . import schemas from . import utils from . import fields @@ -66,47 +65,6 @@ def is_recursive_schema(schema, schema_list=[]): return False -def fill_defaults(schema, args): - """DEPRECATED, function to fill in default values from schema into args - bug: goes into an infinite loop when there is a recursively defined schema - - Parameters - ---------- - schema : marshmallow.Schema - schema to get defaults from - args : - - - Returns - ------- - dict - dictionary with missing default values filled in - - """ - - defaults = [] - - # find all of the schema entries with default values - schemata = [(schema, [])] - while schemata: - subschema, path = schemata.pop() - for k, v in subschema.declared_fields.items(): - if isinstance(v, mm.fields.Nested): - schemata.append((v.schema, path + [k])) - elif v.default != mm.missing: - defaults.append((path + [k], v.default)) - - # put the default entries into the args dictionary - args = copy.deepcopy(args) - for path, val in defaults: - d = args - for path_item in path[:-1]: - d = d.setdefault(path_item, {}) - if path[-1] not in d: - d[path[-1]] = val - return args - - class ArgSchemaParser(object): """The main class you should sub-class to write your own argschema module. Takes input_data, reference to a input_json and the command line inputs and parses out the parameters @@ -258,20 +216,6 @@ def load_schema_with_defaults(self, schema, args): because these won't work with loading defaults. """ - is_recursive = is_recursive_schema(schema) - is_non_default = contains_non_default_schemas(schema) - if (not is_recursive) and is_non_default: - # throw a warning - self.logger.warning("""DEPRECATED:You are using a Schema which contains - a Schema which is not subclassed from argschema.DefaultSchema, - default values will not work correctly in this case, - this use is deprecated, and future versions will not fill in default - values when you use non-DefaultSchema subclasses""") - args = fill_defaults(schema, args) - if is_recursive and is_non_default: - raise mm.ValidationError( - 'Recursive schemas need to subclass argschema.DefaultSchema else defaults will not work') - # load the dictionary via the schema result = utils.load(schema, args) diff --git a/argschema/deprecated.py b/argschema/deprecated.py deleted file mode 100644 index 6d8ce615..00000000 --- a/argschema/deprecated.py +++ /dev/null @@ -1,12 +0,0 @@ -from .argschema_parser import ArgSchemaParser -from .schemas import ArgSchema - - -class JsonModule(ArgSchemaParser): - """deprecated name of ArgSchemaParser""" - pass - - -class ModuleParameters(ArgSchema): - """deprecated name of ArgSchema""" - pass diff --git a/argschema/fields/__init__.py b/argschema/fields/__init__.py index c752233c..5adb5886 100644 --- a/argschema/fields/__init__.py +++ b/argschema/fields/__init__.py @@ -1,14 +1,13 @@ '''sub-module for custom marshmallow fields of general utility''' -from marshmallow.fields import * # noQA:F401 -from marshmallow.fields import __all__ as __mmall__ # noQA:F401 -from .files import OutputFile, InputDir, InputFile, OutputDir # noQA:F401 -from .numpyarrays import NumpyArray # noQA:F401 -from .deprecated import OptionList # noQA:F401 -from .loglevel import LogLevel # noQA:F401 -from .slice import Slice # noQA:F401 +from marshmallow.fields import * +from marshmallow.fields import __all__ as __mmall__ +from .files import OutputFile, InputDir, InputFile, OutputDir +from .numpyarrays import NumpyArray +from .loglevel import LogLevel +from .slice import Slice __all__ = __mmall__ + ['OutputFile', 'InputDir', 'InputFile', 'OutputDir', - 'NumpyArray', 'OptionList', 'LogLevel', 'Slice'] + 'NumpyArray','LogLevel', 'Slice'] # Python 2 subpackage (not module) * imports break if items in __all__ # are unicode. diff --git a/argschema/fields/deprecated.py b/argschema/fields/deprecated.py deleted file mode 100644 index 717b876b..00000000 --- a/argschema/fields/deprecated.py +++ /dev/null @@ -1,34 +0,0 @@ -'''marshmallow fields related to choosing amongst a set of options''' -import marshmallow as mm -import logging -logger = logging.getLogger('argschema') - - -class OptionList(mm.fields.Field): - """OptionList is a marshmallow field which enforces that this field - is one of a finite set of options. - OptionList(options,*args,**kwargs) where options is a list of - json compatible options which this option will be enforced to belong - - Parameters - ---------- - options : list - A list of python objects of which this field must be one of - kwargs : dict - the same as any :class:`Field` receives - """ - - def __init__(self, options, **kwargs): - self.options = options - logger.warning( - 'DEPRECATED: use validate=mm.validate.OneOf([a,b,c...]) in field definition instead') - super(OptionList, self).__init__(**kwargs) - - def _serialize(self, value, attr, obj): - return value - - def _validate(self, value): - if value not in self.options: - raise mm.ValidationError("%s is not a valid option" % value) - - return value diff --git a/argschema/fields/numpyarrays.py b/argschema/fields/numpyarrays.py index a369ad9e..2e956c4a 100644 --- a/argschema/fields/numpyarrays.py +++ b/argschema/fields/numpyarrays.py @@ -20,8 +20,6 @@ class NumpyArray(mm.fields.List): def __init__(self, dtype=None, *args, **kwargs): self.dtype = dtype - if "cli_as_single_argument" not in kwargs: - kwargs["cli_as_single_argument"] = True super(NumpyArray, self).__init__(mm.fields.Field, *args, **kwargs) def _deserialize(self, value, attr, obj, **kwargs): diff --git a/argschema/utils.py b/argschema/utils.py index a5791a8b..062e5af5 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -2,7 +2,6 @@ marshmallow schemas to argparse and merging dictionaries from both systems ''' import logging -import warnings import ast import argparse from operator import add @@ -55,11 +54,7 @@ def get_type_from_field(field): callable Function to call to cast argument to """ - if (isinstance(field, fields.List) and - not field.metadata.get("cli_as_single_argument", False)): - return list - else: - return FIELD_TYPE_MAP.get(type(field), str) + return FIELD_TYPE_MAP.get(type(field), str) def cli_error_dict(arg_path, field_type, index=0): @@ -335,17 +330,6 @@ def build_schema_arguments(schema, arguments=None, path=None, description=None): if isinstance(validator, mm.validate.OneOf): arg['help'] += " (valid options are {})".format(validator.choices) - if (isinstance(field, mm.fields.List) and - not field.metadata.get("cli_as_single_argument", False)): - warn_msg = ("'{}' is using old-style command-line syntax with " - "each element as a separate argument. This will " - "not be supported in argschema after " - "2.0. See http://argschema.readthedocs.io/en/" - "master/user/intro.html#command-line-specification" - " for details.").format(arg_name) - warnings.warn(warn_msg, FutureWarning) - arg['nargs'] = '*' - # do type mapping after parsing so we can raise validation errors arg['type'] = str diff --git a/docs/api/argschema.rst b/docs/api/argschema.rst index a8660473..a17ec924 100644 --- a/docs/api/argschema.rst +++ b/docs/api/argschema.rst @@ -19,14 +19,6 @@ argschema\.argschema\_parser module :undoc-members: :show-inheritance: -argschema\.deprecated module ----------------------------- - -.. automodule:: argschema.deprecated - :members: - :undoc-members: - :show-inheritance: - argschema\.schemas module ------------------------- diff --git a/docs/tests/fields.rst b/docs/tests/fields.rst index 933bb8b2..d5c65a9d 100644 --- a/docs/tests/fields.rst +++ b/docs/tests/fields.rst @@ -4,14 +4,6 @@ fields package Submodules ---------- -fields\.test\_deprecated module -------------------------------- - -.. automodule:: fields.test_deprecated - :members: - :undoc-members: - :show-inheritance: - fields\.test\_files module -------------------------- diff --git a/docs/user/intro.rst b/docs/user/intro.rst index c8235309..04138f7c 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -158,25 +158,6 @@ passed by the shell. If there are spaces in the value, it will need to be wrapped in quotes, and any special characters will need to be escaped with \. Booleans are set with True or 1 for true and False or 0 for false. -An exception to this rule is list formatting. If a schema contains a -:class:`~marshmallow.fields.List` and does not set the -`cli_as_single_argument` keyword argument to True, lists will be parsed -as `--list_name ...`. In argschema 2.0 lists will be -parsed in the same way as other arguments, as it allows more flexibility -in list types and more clearly represents the intended data structure. - -An example script showing old and new list settings: - -.. literalinclude:: ../../examples/deprecated_example.py - :caption: deprecated_example.py - -Running this code can demonstrate the differences in command-line usage: - -.. command-output:: python deprecated_example.py --help - :cwd: /../examples - -.. command-output:: python deprecated_example.py --list_old 9.1 8.2 7.3 --list_new [6.4,5.5,4.6] - :cwd: /../examples We can explore some typical examples of command line usage with the following script: diff --git a/examples/cli_example.py b/examples/cli_example.py index 5a71323e..b7cb6968 100644 --- a/examples/cli_example.py +++ b/examples/cli_example.py @@ -13,10 +13,8 @@ class MySchema(ArgSchema): description="my example array") string_list = List(List(Str), default=[["hello", "world"], ["lists!"]], - cli_as_single_argument=True, description="list of lists of strings") int_list = List(Int, default=[1, 2, 3], - cli_as_single_argument=True, description="list of ints") nested = Nested(MyNestedSchema, required=True) diff --git a/examples/deprecated_example.py b/examples/deprecated_example.py deleted file mode 100644 index bea1e12c..00000000 --- a/examples/deprecated_example.py +++ /dev/null @@ -1,15 +0,0 @@ -from argschema import ArgSchema, ArgSchemaParser -from argschema.fields import List, Float - - -class MySchema(ArgSchema): - list_old = List(Float, default=[1.1, 2.2, 3.3], - description="float list with deprecated cli") - list_new = List(Float, default=[4.4, 5.5, 6.6], - cli_as_single_argument=True, - description="float list with supported cli") - - -if __name__ == '__main__': - mod = ArgSchemaParser(schema_type=MySchema) - print(mod.args) diff --git a/test/fields/test_deprecated.py b/test/fields/test_deprecated.py deleted file mode 100644 index a678074b..00000000 --- a/test/fields/test_deprecated.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest -from argschema import ArgSchemaParser, ArgSchema -from argschema.fields import OptionList -import marshmallow as mm - - -class OptionSchema(ArgSchema): - a = OptionList([1, 2, 3], required=True, description='one of 1,2,3') - - -def test_option_list(): - input_data = { - 'a': 1 - } - ArgSchemaParser( - input_data=input_data, schema_type=OptionSchema, args=[]) - - -def test_bad_option(): - input_data = { - 'a': 4 - } - with pytest.raises(mm.ValidationError): - ArgSchemaParser( - input_data=input_data, schema_type=OptionSchema, args=[]) diff --git a/test/test_cli_overrides.py b/test/test_cli_overrides.py index 146fd287..fd041ccf 100644 --- a/test/test_cli_overrides.py +++ b/test/test_cli_overrides.py @@ -65,14 +65,6 @@ def test_data(inputdir, inputfile, outputdir, outputfile): return data -@pytest.fixture -def deprecated_data(): - data = { - "list_deprecated": [300, 200, 800, 1000], - } - return data - - class MyNestedSchema(DefaultSchema): a = fields.Int(required=True) b = fields.Boolean(required=True) @@ -104,10 +96,6 @@ class MySchema(ArgSchema): uuid = fields.UUID(required=True) -class MyDeprecatedSchema(ArgSchema): - list_deprecated = fields.List(fields.Int, required=True) - - def test_unexpected_input(test_data): with pytest.raises(SystemExit): ArgSchemaParser(test_data, schema_type=MySchema, diff --git a/test/test_first_test.py b/test/test_first_test.py index 561be6c5..a038e71c 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -126,7 +126,7 @@ def test_simple_extension_write_overwrite(simple_extension_file): def test_simple_extension_write_overwrite_list(simple_extension_file): args = ['--input_json', str(simple_extension_file), - '--test.d', '6', '7', '8', '9'] + '--test.d', "[6,7,8,9]"] mod = ArgSchemaParser(schema_type=SimpleExtension, args=args) assert len(mod.args['test']['d']) == 4 @@ -139,28 +139,6 @@ def test_bad_input_json_argparse(): # TESTS DEMONSTRATING BAD BEHAVIOR OF DEFAULT LOADING -class MyExtensionOld(mm.Schema): - a = mm.fields.Str(description='a string') - b = mm.fields.Int(description='an integer') - c = mm.fields.Int(description='an integer', default=10) - d = mm.fields.List(mm.fields.Int, - description='a list of integers') - - -class SimpleExtensionOld(ArgSchema): - test = mm.fields.Nested(MyExtensionOld, default=None, required=True) - - -def test_simple_extension_old_pass(): - mod = ArgSchemaParser( - input_data=SimpleExtension_example_valid, - schema_type=SimpleExtensionOld, args=[]) - assert mod.args['test']['a'] == 'hello' - assert mod.args['test']['b'] == 1 - assert mod.args['test']['c'] == 10 - assert len(mod.args['test']['d']) == 3 - - class RecursiveSchema(argschema.schemas.DefaultSchema): children = mm.fields.Nested("self", many=True, description='children of this node') From 2b3414b8fd89a9e43b567f83f8613fa48264a4a0 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 27 Apr 2018 07:54:47 -0700 Subject: [PATCH 04/81] flake8 changes --- argschema/fields/__init__.py | 15 ++++++++------- test/test_first_test.py | 1 + 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/argschema/fields/__init__.py b/argschema/fields/__init__.py index 5adb5886..c752233c 100644 --- a/argschema/fields/__init__.py +++ b/argschema/fields/__init__.py @@ -1,13 +1,14 @@ '''sub-module for custom marshmallow fields of general utility''' -from marshmallow.fields import * -from marshmallow.fields import __all__ as __mmall__ -from .files import OutputFile, InputDir, InputFile, OutputDir -from .numpyarrays import NumpyArray -from .loglevel import LogLevel -from .slice import Slice +from marshmallow.fields import * # noQA:F401 +from marshmallow.fields import __all__ as __mmall__ # noQA:F401 +from .files import OutputFile, InputDir, InputFile, OutputDir # noQA:F401 +from .numpyarrays import NumpyArray # noQA:F401 +from .deprecated import OptionList # noQA:F401 +from .loglevel import LogLevel # noQA:F401 +from .slice import Slice # noQA:F401 __all__ = __mmall__ + ['OutputFile', 'InputDir', 'InputFile', 'OutputDir', - 'NumpyArray','LogLevel', 'Slice'] + 'NumpyArray', 'OptionList', 'LogLevel', 'Slice'] # Python 2 subpackage (not module) * imports break if items in __all__ # are unicode. diff --git a/test/test_first_test.py b/test/test_first_test.py index a038e71c..d03d40db 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -139,6 +139,7 @@ def test_bad_input_json_argparse(): # TESTS DEMONSTRATING BAD BEHAVIOR OF DEFAULT LOADING + class RecursiveSchema(argschema.schemas.DefaultSchema): children = mm.fields.Nested("self", many=True, description='children of this node') From b6ddb98f93ef54c1b958224a9b3a438e352dcb32 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 28 Apr 2018 08:23:35 -0700 Subject: [PATCH 05/81] removing more deprecated items --- argschema/__init__.py | 1 - argschema/fields/__init__.py | 1 - 2 files changed, 2 deletions(-) diff --git a/argschema/__init__.py b/argschema/__init__.py index f28aafde..4693a4b9 100644 --- a/argschema/__init__.py +++ b/argschema/__init__.py @@ -2,7 +2,6 @@ from .fields import InputFile, InputDir, OutputFile, OptionList # noQA:F401 from .schemas import ArgSchema # noQA:F401 from .argschema_parser import ArgSchemaParser # noQA:F401 -from .deprecated import JsonModule, ModuleParameters # noQA:F401 __version__ = "3.0.1" diff --git a/argschema/fields/__init__.py b/argschema/fields/__init__.py index c752233c..94857d9b 100644 --- a/argschema/fields/__init__.py +++ b/argschema/fields/__init__.py @@ -3,7 +3,6 @@ from marshmallow.fields import __all__ as __mmall__ # noQA:F401 from .files import OutputFile, InputDir, InputFile, OutputDir # noQA:F401 from .numpyarrays import NumpyArray # noQA:F401 -from .deprecated import OptionList # noQA:F401 from .loglevel import LogLevel # noQA:F401 from .slice import Slice # noQA:F401 From 73fdff80b8065d749d808d0b6ec120dd64ec49f4 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 28 Apr 2018 08:16:46 -0700 Subject: [PATCH 06/81] adding sources --- argschema/argschema_parser.py | 39 ++++++++++++++++++++-------- argschema/sources/__init__.py | 0 argschema/sources/json_source.py | 27 ++++++++++++++++++++ argschema/sources/pika_source.py | 29 +++++++++++++++++++++ argschema/sources/source.py | 44 ++++++++++++++++++++++++++++++++ argschema/sources/yaml_source.py | 10 ++++++++ test/test_sources.py | 19 ++++++++++++++ 7 files changed, 157 insertions(+), 11 deletions(-) create mode 100644 argschema/sources/__init__.py create mode 100644 argschema/sources/json_source.py create mode 100644 argschema/sources/pika_source.py create mode 100644 argschema/sources/source.py create mode 100644 argschema/sources/yaml_source.py create mode 100644 test/test_sources.py diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index e7402242..cf1d8ad5 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -7,7 +7,7 @@ from . import utils from . import fields import marshmallow as mm - +from .sources.json_source import JsonSource def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -76,11 +76,15 @@ class ArgSchemaParser(object): Parameters ---------- input_data : dict or None - dictionary parameters instead of --input_json + dictionary parameters to fall back on if all source aren't present schema_type : schemas.ArgSchema the schema to use to validate the parameters output_schema_type : marshmallow.Schema the schema to use to validate the output_json, used by self.output + input_source : argschema.sources.source.Source + a generic source of a dictionary + output_source : argschema.sources.source.Source + a generic output to put output dictionary args : list or None command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing logger_name : str @@ -95,19 +99,23 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None + input_config_map = [ JsonSource ] + output_config_map = [ JsonSource ] def __init__(self, input_data=None, # dictionary input as option instead of --input_json schema_type=None, # schema for parsing arguments output_schema_type=None, # schema for parsing output_json args=None, + input_source = None, + output_source = None, logger_name=__name__): if schema_type is None: schema_type = self.default_schema if output_schema_type is None: output_schema_type = self.default_output_schema - + self.schema = schema_type() self.logger = self.initialize_logger(logger_name, 'WARNING') self.logger.debug('input_data is {}'.format(input_data)) @@ -118,15 +126,24 @@ def __init__(self, argsdict = utils.args_to_dict(argsobj, self.schema) self.logger.debug('argsdict is {}'.format(argsdict)) - if argsobj.input_json is not None: - fields.files.validate_input_path(argsobj.input_json) - with open(argsobj.input_json, 'r') as j: - jsonargs = json.load(j) - else: - jsonargs = input_data if input_data else {} - + #if you received an input_source, get the dictionary from there + if input_source is not None: + input_data = input_source.get_dict() + + #loop over the set of input_configurations to see if the command line arguments include a valid configuration + #for one of them + for InputSource in self.input_config_map: + try: + input_config_d = InputSource.get_config(InputSource.InputConfigSchema,argsdict) + input_source = InputSource(**input_config_d) + input_data = input_source.get_dict() + #if the command line argument dictionary doesn't contain a valid configuration + #simply move on to the next one + except mm.ValidationError as e: + pass + # merge the command line dictionary into the input json - args = utils.smart_merge(jsonargs, argsdict) + args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) # validate with load! diff --git a/argschema/sources/__init__.py b/argschema/sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py new file mode 100644 index 00000000..c778482c --- /dev/null +++ b/argschema/sources/json_source.py @@ -0,0 +1,27 @@ +from .source import FileSource +import json +import marshmallow as mm +import argschema + +class JsonInputConfigSchema(mm.Schema): + input_json = argschema.fields.InputFile(required=True, + description = 'filepath to input_json') + +class JsonOutputConfigSchema(mm.Schema): + output_json = argschema.fields.OutputFile(required=True, + description = 'filepath to save output_json') + +class JsonSource(FileSource): + InputConfigSchema = JsonInputConfigSchema + OutputConfigSchema = JsonOutputConfigSchema + def __init__(self,input_json=None, output_json=None): + if input_json is not None: + self.filepath = input_json + if output_json is not None: + self.filepath = output_json + + def read_file(self,fp): + return json.load(fp) + + def write_file(self,fp,d): + json.dump(d,fp) diff --git a/argschema/sources/pika_source.py b/argschema/sources/pika_source.py new file mode 100644 index 00000000..ee9b22a3 --- /dev/null +++ b/argschema/sources/pika_source.py @@ -0,0 +1,29 @@ +from .source import InputSource +import pika +import json + +class PikaJsonSource(InputSource): + + def __init__(self,channel,queue): + """Pika client source for dictionary + + Parameters + ---------- + channel: pika.channel.Channel + pika client channel to connect to + queue: str + queue name to get message from + """ + assert(type(channel)==pika.channel.Channel) + self.channel = channel + self.queue = queue + + def get_dict(self): + method_frame, header_frame, body = self.channel.basic_get(self.queue) + if method_frame: + d = json.loads(body) + self.channel.basic_ack(method_frame.delivery_tag) + return d + + def put_dict(self,d): + \ No newline at end of file diff --git a/argschema/sources/source.py b/argschema/sources/source.py new file mode 100644 index 00000000..41b28dd3 --- /dev/null +++ b/argschema/sources/source.py @@ -0,0 +1,44 @@ +import json +import marshmallow as mm + +class Source(object): + InputConfigSchema = None + OutputConfigSchema = None + + def __init__(self): + pass + + def get_dict(self): + pass + + def put_dict(self,d): + pass + + @staticmethod + def get_config(Schema,d): + schema = Schema() + result,errors = schema.load(d) + if len(errors)>0: + raise mm.ValidationError(json.dumps(errors, indent=2)) + return result + +class FileSource(Source): + + def __init__(self,filepath): + self.filepath = filepath + + def get_dict(self): + with open(self.filepath,'r') as fp: + d = self.read_file(fp) + return d + + def put_dict(self,d): + with open(self.filepath,'w') as fp: + self.write_file(fp,d) + + def read_file(self,fp): + pass + + def write_file(self,fp,d): + pass + diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py new file mode 100644 index 00000000..ed3ef0a4 --- /dev/null +++ b/argschema/sources/yaml_source.py @@ -0,0 +1,10 @@ +import yaml +from .source import FileSource + +class YamlSource(FileSource): + + def read_file(self,fp): + return yaml.load(fp) + + def write_file(self,fp,d): + yaml.dump(d,fp) \ No newline at end of file diff --git a/test/test_sources.py b/test/test_sources.py new file mode 100644 index 00000000..48308794 --- /dev/null +++ b/test/test_sources.py @@ -0,0 +1,19 @@ +import argschema +from argschema.sources.json_source import JsonSource +from argschema.sources.yaml_source import YamlSource +from test_argschema_parser import MyParser +import json + +def test_json_source(tmpdir): + file_in = tmpdir.join('test_input_json.json') + input_data = { + 'a':5, + 'nest':{ + 'one':7, + 'two':False + } + } + json.dump(input_data,file_in) + mod = MyParser(input_source= JsonSource(str(file_in)), args=[]) + + \ No newline at end of file From 073311491a9404ad1fd6d92157b7c29060360426 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 09:33:59 -0800 Subject: [PATCH 07/81] fixed test so that input_json is valid json --- test/test_first_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_first_test.py b/test/test_first_test.py index d03d40db..18d9ccaa 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -18,7 +18,7 @@ def test_bad_path(): def test_simple_example(tmpdir): file_in = tmpdir.join('test_input_json.json') - file_in.write('nonesense') + file_in.write('{}') file_out = tmpdir.join('test_output.json') From 5795d44d4f414a91dd3eee68eac79ea3b9455bbf Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 16:38:08 -0800 Subject: [PATCH 08/81] merging changes with sink options --- argschema/argschema_parser.py | 82 +++++++++++++++++++++++--------- argschema/schemas.py | 9 ++-- argschema/sources/json_source.py | 18 +++---- argschema/sources/source.py | 66 +++++++++++++++++-------- argschema/sources/yaml_source.py | 24 +++++++++- argschema/utils.py | 37 ++++++++------ test/sources/test_classes.py | 11 +++++ test/sources/test_json.py | 28 +++++++++++ test/sources/test_yaml.py | 29 +++++++++++ test/test_first_test.py | 9 +--- test/test_sources.py | 19 -------- 11 files changed, 233 insertions(+), 99 deletions(-) create mode 100644 test/sources/test_classes.py create mode 100644 test/sources/test_json.py create mode 100644 test/sources/test_yaml.py delete mode 100644 test/test_sources.py diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index cf1d8ad5..5747f915 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -7,7 +7,9 @@ from . import utils from . import fields import marshmallow as mm -from .sources.json_source import JsonSource +from .sources.json_source import JsonSource, JsonSink +from .sources.yaml_source import YamlSource, YamlSink +from .sources.source import NotConfiguredSourceError def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -100,7 +102,7 @@ class ArgSchemaParser(object): default_schema = schemas.ArgSchema default_output_schema = None input_config_map = [ JsonSource ] - output_config_map = [ JsonSource ] + output_config_map = [ JsonSink ] def __init__(self, input_data=None, # dictionary input as option instead of --input_json @@ -108,7 +110,7 @@ def __init__(self, output_schema_type=None, # schema for parsing output_json args=None, input_source = None, - output_source = None, + output_sink = None, logger_name=__name__): if schema_type is None: @@ -121,7 +123,17 @@ def __init__(self, self.logger.debug('input_data is {}'.format(input_data)) # convert schema to argparse object - p = utils.schema_argparser(self.schema) + + #consolidate a list of the input and output source + #command line configuration schemas + io_schemas = [] + for in_cfg in self.input_config_map: + io_schemas.append(in_cfg.ConfigSchema()) + for out_cfg in self.output_config_map: + io_schemas.append(out_cfg.ConfigSchema()) + + #build a command line parser from the input schemas and configurations + p = utils.schema_argparser(self.schema,io_schemas) argsobj = p.parse_args(args) argsdict = utils.args_to_dict(argsobj, self.schema) self.logger.debug('argsdict is {}'.format(argsdict)) @@ -129,23 +141,37 @@ def __init__(self, #if you received an input_source, get the dictionary from there if input_source is not None: input_data = input_source.get_dict() - - #loop over the set of input_configurations to see if the command line arguments include a valid configuration - #for one of them + else: #see if the input_data itself contains an InputSource configuration use that + for InputSource in self.input_config_map: + try: + input_data = get_input(InputSource,input_data) + except NotConfiguredSourceError as e: + pass + + #loop over the set of input_configurations to see if the command line arguments + # include a valid configuration for an input_source for InputSource in self.input_config_map: try: - input_config_d = InputSource.get_config(InputSource.InputConfigSchema,argsdict) - input_source = InputSource(**input_config_d) - input_data = input_source.get_dict() + input_data = get_input(InputSource,argsdict) #if the command line argument dictionary doesn't contain a valid configuration #simply move on to the next one - except mm.ValidationError as e: + except NotConfiguredSourceError as e: pass - + # merge the command line dictionary into the input json args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) + # if the output source was not passed in, see if there is a configuration in the combined args + if output_sink is None: + for OutputSink in self.output_config_map: + try: + output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,args) + output_sink = OutputSink(**output_config_d) + except NotConfiguredSourceError: + pass + # save the output source for later + self.output_sink = output_sink # validate with load! result = self.load_schema_with_defaults(self.schema, args) @@ -185,30 +211,36 @@ def get_output_json(self, d): return output_json - def output(self, d, output_path=None, **json_dump_options): + def output(self,d,output_path=None,sink=None,**sink_options): """method for outputing dictionary to the output_json file path after validating it through the output_schema_type Parameters ---------- d:dict - output dictionary to output + output dictionary to output + sink: argschema.sources.source.ArgSink + output_sink to output to (optional default to self.output_source) output_path: str path to save to output file, optional (with default to self.mod['output_json'] location) - **json_dump_options : - will be passed through to json.dump - + **sink_options : + will be passed through to sink.put_dict + + (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) Raises ------ marshmallow.ValidationError If any of the output dictionary doesn't meet the output schema """ - if output_path is None: - output_path = self.args['output_json'] - - output_json = self.get_output_json(d) - with open(output_path, 'w') as fp: - json.dump(output_json, fp, **json_dump_options) + + output_d = self.get_output_json(d) + if output_path is not None: + self.logger.warning('DEPRECATED, pass sink instead') + sink = JsonSink(output_json=output_path) + if sink is not None: + sink.put_dict(output_d) + else: + self.output_sink.put_dict(output_d,**sink_options) def load_schema_with_defaults(self, schema, args): """method for deserializing the arguments dictionary (args) @@ -262,3 +294,7 @@ def initialize_logger(name, log_level): logger = logging.getLogger(name) logger.setLevel(level=level) return logger + +class ArgSchemaYamlParser(ArgSchemaParser): + input_config_map = [YamlSource] + output_config_map = [YamlSink] \ No newline at end of file diff --git a/argschema/schemas.py b/argschema/schemas.py index 52dac07c..48764c40 100644 --- a/argschema/schemas.py +++ b/argschema/schemas.py @@ -34,11 +34,10 @@ class ArgSchema(DefaultSchema): input_json and output_json files and the log_level """ - input_json = InputFile( - description="file path of input json file") - - output_json = OutputFile( - description="file path to output json file") + # input_json = InputFile( + # description= "file path of input json file") + # output_json = OutputFile( + # description= "file path to output json file") log_level = LogLevel( default='ERROR', description="set the logging level of the module") diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index c778482c..b2754362 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -1,4 +1,4 @@ -from .source import FileSource +from .source import FileSource, FileSink import json import marshmallow as mm import argschema @@ -12,16 +12,18 @@ class JsonOutputConfigSchema(mm.Schema): description = 'filepath to save output_json') class JsonSource(FileSource): - InputConfigSchema = JsonInputConfigSchema - OutputConfigSchema = JsonOutputConfigSchema - def __init__(self,input_json=None, output_json=None): - if input_json is not None: - self.filepath = input_json - if output_json is not None: - self.filepath = output_json + ConfigSchema = JsonInputConfigSchema + def __init__(self,input_json=None): + self.filepath = input_json def read_file(self,fp): return json.load(fp) + +class JsonSink(FileSink): + ConfigSchema = JsonOutputConfigSchema + + def __init__(self,output_json=None): + self.filepath = output_json def write_file(self,fp,d): json.dump(d,fp) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 41b28dd3..b9a060de 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,28 +1,53 @@ import json import marshmallow as mm -class Source(object): - InputConfigSchema = None - OutputConfigSchema = None +class ConfigurableSourceError(mm.ValidationError): + """Base Exception class for configurable sources""" + pass - def __init__(self): - pass +class MisconfiguredSourceError(ConfigurableSourceError): + """Exception when a source configuration was present in part but failed + validation""" + pass - def get_dict(self): - pass - - def put_dict(self,d): - pass +class NotConfiguredSourceError(ConfigurableSourceError): + """Exception when the source configuration is simply completely missing""" + pass +class ImproperSourceConfigurationSchemaError(ConfigurableSourceError): + """Exception when the source configuration schema isn't valid""" + pass + +def d_contains_any_fields(schema,d): + for field_name, field in schema.declared_fields.items(): + if field_name in d.keys(): + if d[field_name] is not None: + return True + return False + +class ConfigurableSource(object): + ConfigSchema = None @staticmethod def get_config(Schema,d): schema = Schema() - result,errors = schema.load(d) - if len(errors)>0: - raise mm.ValidationError(json.dumps(errors, indent=2)) - return result + if not d_contains_any_fields(schema,d): + raise NotConfiguredSourceError("This source is not present in \n" + json.dumps(d, indent=2)) + else: + result,errors = schema.load(d) + if len(errors)>0: + raise MisconfiguredSourceError("Source incorrectly configured\n" + json.dumps(errors, indent=2)) + else: + return result + +class ArgSource(ConfigurableSource): + def get_dict(self): + pass -class FileSource(Source): +class ArgSink(ConfigurableSource): + def put_dict(self,d): + pass + +class FileSource(ArgSource): def __init__(self,filepath): self.filepath = filepath @@ -31,14 +56,17 @@ def get_dict(self): with open(self.filepath,'r') as fp: d = self.read_file(fp) return d - - def put_dict(self,d): - with open(self.filepath,'w') as fp: - self.write_file(fp,d) def read_file(self,fp): pass +class FileSink(ArgSink): + def __init__(self,filepath): + self.filepath = filepath + def write_file(self,fp,d): pass + def put_dict(self,d): + with open(self.filepath,'w') as fp: + self.write_file(fp,d) \ No newline at end of file diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index ed3ef0a4..480f4d87 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,10 +1,30 @@ import yaml -from .source import FileSource +from .source import FileSource,FileSink +import argschema +import marshmallow as mm + +class YamlInputConfigSchema(mm.Schema): + input_yaml = argschema.fields.InputFile(required=True, + description = 'filepath to input yaml') + +class YamlOutputConfigSchema(mm.Schema): + output_yaml = argschema.fields.OutputFile(required=True, + description = 'filepath to save output yaml') class YamlSource(FileSource): + ConfigSchema = YamlInputConfigSchema + + def __init__(self,input_yaml=None): + self.filepath = input_yaml def read_file(self,fp): return yaml.load(fp) +class YamlSink(FileSink): + ConfigSchema = YamlOutputConfigSchema + + def __init__(self,output_yaml=None): + self.filepath = output_yaml + def write_file(self,fp,d): - yaml.dump(d,fp) \ No newline at end of file + yaml.dump(d,fp,default_flow_style=False) \ No newline at end of file diff --git a/argschema/utils.py b/argschema/utils.py index 062e5af5..17d14ba3 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -345,35 +345,42 @@ def build_schema_arguments(schema, arguments=None, path=None, description=None): return arguments -def schema_argparser(schema): +def schema_argparser(schema, additional_schemas=None): """given a jsonschema, build an argparse.ArgumentParser Parameters ---------- schema : argschema.schemas.ArgSchema schema to build an argparser from - + additional_schemas : list[marshmallow.schema] + list of additional schemas to add to the command line arguments Returns ------- argparse.ArgumentParser - the represents the schema + that represents the schemas """ - # build up a list of argument groups using recursive function - # to traverse the tree, root node gets the description given by doc string - # of the schema - arguments = build_schema_arguments(schema, description=schema.__doc__) - # make the root schema appeear first rather than last - arguments = [arguments[-1]] + arguments[0:-1] + if additional_schemas is not None: + schema_list = [schema] + additional_schemas + else: + schema_list = [schema] parser = argparse.ArgumentParser() - - for arg_group in arguments: - group = parser.add_argument_group( - arg_group['title'], arg_group['description']) - for arg_name, arg in arg_group['args'].items(): - group.add_argument(arg_name, **arg) + for s in schema_list: + # build up a list of argument groups using recursive function + # to traverse the tree, root node gets the description given by doc string + # of the schema + arguments = build_schema_arguments(s, description=schema.__doc__) + + # make the root schema appeear first rather than last + arguments = [arguments[-1]] + arguments[0:-1] + + for arg_group in arguments: + group = parser.add_argument_group( + arg_group['title'], arg_group['description']) + for arg_name, arg in arg_group['args'].items(): + group.add_argument(arg_name, **arg) return parser diff --git a/test/sources/test_classes.py b/test/sources/test_classes.py new file mode 100644 index 00000000..06ac1c83 --- /dev/null +++ b/test/sources/test_classes.py @@ -0,0 +1,11 @@ +import argschema + +class MyNestedSchema(argschema.schemas.DefaultSchema): + one = argschema.fields.Int(required=True,description="nested integer") + two = argschema.fields.Boolean(required=True,description="a nested boolean") + +class MySchema(argschema.ArgSchema): + a = argschema.fields.Int(required=True,description="parameter a") + b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") + nest = argschema.fields.Nested(MyNestedSchema,description="a nested schema") + diff --git a/test/sources/test_json.py b/test/sources/test_json.py new file mode 100644 index 00000000..3055cb20 --- /dev/null +++ b/test/sources/test_json.py @@ -0,0 +1,28 @@ +import argschema +from argschema.sources.json_source import JsonSource +from test_classes import MySchema +import json +import pytest + +class MyParser(argschema.ArgSchemaParser): + default_schema = MySchema + +@pytest.fixture(scope='module') +def test_input_file(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') + input_data = { + 'a':5, + 'nest':{ + 'one':7, + 'two':False + } + } + with open(str(file_in),'w') as fp: + json.dump(input_data,fp) + return str(file_in) + +def test_json_source(test_input_file): + mod = MyParser(input_source= JsonSource(test_input_file), args=[]) + +def test_json_source_command(test_input_file): + mod = MyParser(args = ['--input_json',test_input_file]) \ No newline at end of file diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py new file mode 100644 index 00000000..35cdf4a1 --- /dev/null +++ b/test/sources/test_yaml.py @@ -0,0 +1,29 @@ +import argschema +from argschema.sources.yaml_source import YamlSource +from argschema.argschema_parser import ArgSchemaYamlParser +from test_classes import MySchema +import yaml +import pytest + +class MyParser(ArgSchemaYamlParser): + default_schema = MySchema + +@pytest.fixture(scope='module') +def test_input_file(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yml') + input_data = { + 'a':5, + 'nest':{ + 'one':7, + 'two':False + } + } + with open(str(file_in),'w') as fp: + yaml.dump(input_data,fp,default_flow_style=False) + return str(file_in) + +def test_yaml_source(test_input_file): + mod = MyParser(input_source= YamlSource(test_input_file), args=[]) + +def test_yaml_source_command(test_input_file): + mod = MyParser(args = ['--input_yaml',test_input_file]) \ No newline at end of file diff --git a/test/test_first_test.py b/test/test_first_test.py index 18d9ccaa..9fd178f7 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -16,15 +16,8 @@ def test_bad_path(): ArgSchemaParser(input_data=example, args=[]) -def test_simple_example(tmpdir): - file_in = tmpdir.join('test_input_json.json') - file_in.write('{}') - - file_out = tmpdir.join('test_output.json') - +def test_simple_example(): example = { - "input_json": str(file_in), - "output_json": str(file_out), "log_level": "CRITICAL"} jm = ArgSchemaParser(input_data=example, args=[]) diff --git a/test/test_sources.py b/test/test_sources.py deleted file mode 100644 index 48308794..00000000 --- a/test/test_sources.py +++ /dev/null @@ -1,19 +0,0 @@ -import argschema -from argschema.sources.json_source import JsonSource -from argschema.sources.yaml_source import YamlSource -from test_argschema_parser import MyParser -import json - -def test_json_source(tmpdir): - file_in = tmpdir.join('test_input_json.json') - input_data = { - 'a':5, - 'nest':{ - 'one':7, - 'two':False - } - } - json.dump(input_data,file_in) - mod = MyParser(input_source= JsonSource(str(file_in)), args=[]) - - \ No newline at end of file From e81d9978aeccb73e9f2bc6eb7efba0f78244b255 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 09:46:09 -0800 Subject: [PATCH 09/81] removed unused exception --- argschema/sources/source.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index b9a060de..6cab177f 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -14,10 +14,6 @@ class NotConfiguredSourceError(ConfigurableSourceError): """Exception when the source configuration is simply completely missing""" pass -class ImproperSourceConfigurationSchemaError(ConfigurableSourceError): - """Exception when the source configuration schema isn't valid""" - pass - def d_contains_any_fields(schema,d): for field_name, field in schema.declared_fields.items(): if field_name in d.keys(): From 309212eabd5229bebfc34c0146fc470df69e0ba9 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 10:02:55 -0800 Subject: [PATCH 10/81] removing unused __init__ --- argschema/sources/source.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 6cab177f..0b86c9b4 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -45,9 +45,6 @@ def put_dict(self,d): class FileSource(ArgSource): - def __init__(self,filepath): - self.filepath = filepath - def get_dict(self): with open(self.filepath,'r') as fp: d = self.read_file(fp) @@ -57,8 +54,6 @@ def read_file(self,fp): pass class FileSink(ArgSink): - def __init__(self,filepath): - self.filepath = filepath def write_file(self,fp,d): pass From 7a5909daa88d12c1639fd3de7a1bd6d424955b28 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 14:01:48 -0800 Subject: [PATCH 11/81] modified code to raise exception when the more than one configuration is done, and considated looping code into private method --- argschema/argschema_parser.py | 87 ++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 5747f915..1222306e 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -11,6 +11,7 @@ from .sources.yaml_source import YamlSource, YamlSink from .sources.source import NotConfiguredSourceError + def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -101,72 +102,64 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - input_config_map = [ JsonSource ] - output_config_map = [ JsonSink ] + input_config_map = [JsonSource] + output_config_map = [JsonSink] def __init__(self, input_data=None, # dictionary input as option instead of --input_json schema_type=None, # schema for parsing arguments output_schema_type=None, # schema for parsing output_json args=None, - input_source = None, - output_sink = None, + input_source=None, + output_sink=None, logger_name=__name__): if schema_type is None: schema_type = self.default_schema if output_schema_type is None: output_schema_type = self.default_output_schema - + self.schema = schema_type() self.logger = self.initialize_logger(logger_name, 'WARNING') self.logger.debug('input_data is {}'.format(input_data)) # convert schema to argparse object - #consolidate a list of the input and output source - #command line configuration schemas + # consolidate a list of the input and output source + # command line configuration schemas io_schemas = [] for in_cfg in self.input_config_map: io_schemas.append(in_cfg.ConfigSchema()) for out_cfg in self.output_config_map: io_schemas.append(out_cfg.ConfigSchema()) - #build a command line parser from the input schemas and configurations - p = utils.schema_argparser(self.schema,io_schemas) + # build a command line parser from the input schemas and configurations + p = utils.schema_argparser(self.schema, io_schemas) argsobj = p.parse_args(args) argsdict = utils.args_to_dict(argsobj, self.schema) self.logger.debug('argsdict is {}'.format(argsdict)) - #if you received an input_source, get the dictionary from there + # if you received an input_source, get the dictionary from there if input_source is not None: input_data = input_source.get_dict() - else: #see if the input_data itself contains an InputSource configuration use that - for InputSource in self.input_config_map: - try: - input_data = get_input(InputSource,input_data) - except NotConfiguredSourceError as e: - pass + else: # see if the input_data itself contains an InputSource configuration use that + config_data = self.__get_input_data_from_config(input_data) + input_data = config_data if config_data is not None else input_data - #loop over the set of input_configurations to see if the command line arguments - # include a valid configuration for an input_source - for InputSource in self.input_config_map: - try: - input_data = get_input(InputSource,argsdict) - #if the command line argument dictionary doesn't contain a valid configuration - #simply move on to the next one - except NotConfiguredSourceError as e: - pass + # check whether the command line arguments contain an input configuration and use that + config_data = self.__get_input_data_from_config(argsdict) + input_data = config_data if config_data is not None else input_data # merge the command line dictionary into the input json args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) # if the output source was not passed in, see if there is a configuration in the combined args - if output_sink is None: + if output_sink is None: for OutputSink in self.output_config_map: - try: - output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,args) + try: + output_config_d = OutputSink.get_config( + OutputSink.ConfigSchema, args) output_sink = OutputSink(**output_config_d) except NotConfiguredSourceError: pass @@ -180,6 +173,33 @@ def __init__(self, self.logger = self.initialize_logger( logger_name, self.args.get('log_level')) + def __get_input_data_from_config(self, d): + """private function to check for ArgSource configurations in a dictionary + and return the data if it exists + + Parameters + ---------- + d : dict + dictionary to look for InputSource configuration parameters in + + Returns + ------- + dict or None + dictionary of InputData if it found a valid configuration, None otherwise + """ + input_set = False + input_data = None + for InputSource in self.input_config_map: + try: + input_data = get_input(InputSource, d) + if input_set == True: + raise MultipleConfiguredSourceError( + "more then one InputSource configuration present in {}".format(d)) + input_set = True + except NotConfiguredSourceError as e: + pass + return input_data + def get_output_json(self, d): """method for getting the output_json pushed through validation if validation exists @@ -211,7 +231,7 @@ def get_output_json(self, d): return output_json - def output(self,d,output_path=None,sink=None,**sink_options): + def output(self, d, output_path=None, sink=None, **sink_options): """method for outputing dictionary to the output_json file path after validating it through the output_schema_type @@ -225,14 +245,14 @@ def output(self,d,output_path=None,sink=None,**sink_options): path to save to output file, optional (with default to self.mod['output_json'] location) **sink_options : will be passed through to sink.put_dict - + (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) Raises ------ marshmallow.ValidationError If any of the output dictionary doesn't meet the output schema """ - + output_d = self.get_output_json(d) if output_path is not None: self.logger.warning('DEPRECATED, pass sink instead') @@ -240,7 +260,7 @@ def output(self,d,output_path=None,sink=None,**sink_options): if sink is not None: sink.put_dict(output_d) else: - self.output_sink.put_dict(output_d,**sink_options) + self.output_sink.put_dict(output_d, **sink_options) def load_schema_with_defaults(self, schema, args): """method for deserializing the arguments dictionary (args) @@ -295,6 +315,7 @@ def initialize_logger(name, log_level): logger.setLevel(level=level) return logger + class ArgSchemaYamlParser(ArgSchemaParser): input_config_map = [YamlSource] - output_config_map = [YamlSink] \ No newline at end of file + output_config_map = [YamlSink] From cab10e3bdda4e0ff9498e0e1f56bb227af3eda47 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 14:02:03 -0800 Subject: [PATCH 12/81] added exception to facilitate checking for extra configurations --- argschema/sources/source.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 0b86c9b4..bbdf51c2 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -14,6 +14,10 @@ class NotConfiguredSourceError(ConfigurableSourceError): """Exception when the source configuration is simply completely missing""" pass +class MultipleConfiguredSourceError(ConfigurableSourceError): + """Exception when there is more than one validly configured Source configured""" + pass + def d_contains_any_fields(schema,d): for field_name, field in schema.declared_fields.items(): if field_name in d.keys(): From 42084bf3774b4a964208e340415b484cd69ab704 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 14:02:25 -0800 Subject: [PATCH 13/81] added more testing for yaml output source --- test/sources/test_classes.py | 3 +++ test/sources/test_yaml.py | 23 ++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/test/sources/test_classes.py b/test/sources/test_classes.py index 06ac1c83..a9de6944 100644 --- a/test/sources/test_classes.py +++ b/test/sources/test_classes.py @@ -9,3 +9,6 @@ class MySchema(argschema.ArgSchema): b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") nest = argschema.fields.Nested(MyNestedSchema,description="a nested schema") +class MyOutputSchema(argschema.schemas.DefaultSchema): + a = argschema.fields.Int(required=True,description="parameter a") + b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") \ No newline at end of file diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 35cdf4a1..eb75b064 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -1,12 +1,15 @@ import argschema -from argschema.sources.yaml_source import YamlSource +from argschema.sources.yaml_source import YamlSource, YamlSink from argschema.argschema_parser import ArgSchemaYamlParser -from test_classes import MySchema +from test_classes import MySchema, MyOutputSchema import yaml import pytest + + class MyParser(ArgSchemaYamlParser): default_schema = MySchema + default_output_schema = MyOutputSchema @pytest.fixture(scope='module') def test_input_file(tmpdir_factory): @@ -26,4 +29,18 @@ def test_yaml_source(test_input_file): mod = MyParser(input_source= YamlSource(test_input_file), args=[]) def test_yaml_source_command(test_input_file): - mod = MyParser(args = ['--input_yaml',test_input_file]) \ No newline at end of file + mod = MyParser(args = ['--input_yaml',test_input_file]) + +def test_yaml_sink(test_input_file,tmpdir): + outfile=tmpdir.join('test_out.yml') + output_data = { + 'a':3 + } + mod = MyParser(input_source= YamlSource(test_input_file), + output_sink = YamlSink(str(outfile))) + mod.output(output_data) + + with open(str(outfile),'r') as fp: + d=yaml.load(fp) + output_data['b']="my value" + assert (output_data == d) From 426ccd891db75a1450de7e88060397ff665b7e05 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 17:33:29 -0800 Subject: [PATCH 14/81] pep8 --- test/sources/test_yaml.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index eb75b064..0ff47dbb 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -6,41 +6,44 @@ import pytest - class MyParser(ArgSchemaYamlParser): default_schema = MySchema default_output_schema = MyOutputSchema + @pytest.fixture(scope='module') def test_input_file(tmpdir_factory): file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yml') input_data = { - 'a':5, - 'nest':{ - 'one':7, - 'two':False + 'a': 5, + 'nest': { + 'one': 7, + 'two': False } } - with open(str(file_in),'w') as fp: - yaml.dump(input_data,fp,default_flow_style=False) + with open(str(file_in), 'w') as fp: + yaml.dump(input_data, fp, default_flow_style=False) return str(file_in) + def test_yaml_source(test_input_file): - mod = MyParser(input_source= YamlSource(test_input_file), args=[]) + mod = MyParser(input_source=YamlSource(test_input_file), args=[]) + def test_yaml_source_command(test_input_file): - mod = MyParser(args = ['--input_yaml',test_input_file]) + mod = MyParser(args=['--input_yaml', test_input_file]) -def test_yaml_sink(test_input_file,tmpdir): - outfile=tmpdir.join('test_out.yml') + +def test_yaml_sink(test_input_file, tmpdir): + outfile = tmpdir.join('test_out.yml') output_data = { - 'a':3 + 'a': 3 } - mod = MyParser(input_source= YamlSource(test_input_file), - output_sink = YamlSink(str(outfile))) + mod = MyParser(input_source=YamlSource(test_input_file), + output_sink=YamlSink(str(outfile))) mod.output(output_data) - - with open(str(outfile),'r') as fp: - d=yaml.load(fp) - output_data['b']="my value" + + with open(str(outfile), 'r') as fp: + d = yaml.load(fp) + output_data['b'] = "my value" assert (output_data == d) From 7289b812efd5abc3532974bc24d2706dc8db9290 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 17:41:24 -0800 Subject: [PATCH 15/81] fixed import error --- argschema/argschema_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 1222306e..7ef46ac3 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -9,7 +9,7 @@ import marshmallow as mm from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink -from .sources.source import NotConfiguredSourceError +from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError def contains_non_default_schemas(schema, schema_list=[]): From e0c2d2273afe1d67de2c679db3c6d28b2650086d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 17:41:38 -0800 Subject: [PATCH 16/81] added tests for multiple input configurations --- test/sources/test_yaml.py | 59 +++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 0ff47dbb..40aded17 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -1,45 +1,60 @@ import argschema from argschema.sources.yaml_source import YamlSource, YamlSink +from argschema.sources.json_source import JsonSource, JsonSink +from argschema.sources.source import MultipleConfiguredSourceError from argschema.argschema_parser import ArgSchemaYamlParser from test_classes import MySchema, MyOutputSchema import yaml import pytest - +import json class MyParser(ArgSchemaYamlParser): default_schema = MySchema default_output_schema = MyOutputSchema +class MyDualParser(MyParser): + input_config_map = [JsonSource, YamlSource] + output_config_map = [JsonSink, YamlSink] + +input_data = { + 'a': 5, + 'nest': { + 'one': 7, + 'two': False + } +} @pytest.fixture(scope='module') -def test_input_file(tmpdir_factory): +def test_yaml_input_file(tmpdir_factory): file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yml') - input_data = { - 'a': 5, - 'nest': { - 'one': 7, - 'two': False - } - } + with open(str(file_in), 'w') as fp: yaml.dump(input_data, fp, default_flow_style=False) return str(file_in) +@pytest.fixture(scope='module') +def test_json_input_file(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') + + with open(str(file_in), 'w') as fp: + json.dump(input_data, fp) + return str(file_in) + -def test_yaml_source(test_input_file): - mod = MyParser(input_source=YamlSource(test_input_file), args=[]) +def test_yaml_source(test_yaml_input_file): + mod = MyParser(input_source=YamlSource(test_yaml_input_file), args=[]) -def test_yaml_source_command(test_input_file): - mod = MyParser(args=['--input_yaml', test_input_file]) +def test_yaml_source_command(test_yaml_input_file): + mod = MyParser(args=['--input_yaml', test_yaml_input_file]) -def test_yaml_sink(test_input_file, tmpdir): +def test_yaml_sink(test_yaml_input_file, tmpdir): outfile = tmpdir.join('test_out.yml') output_data = { 'a': 3 } - mod = MyParser(input_source=YamlSource(test_input_file), + mod = MyParser(input_source=YamlSource(test_yaml_input_file), output_sink=YamlSink(str(outfile))) mod.output(output_data) @@ -47,3 +62,17 @@ def test_yaml_sink(test_input_file, tmpdir): d = yaml.load(fp) output_data['b'] = "my value" assert (output_data == d) + +def test_dual_parser(test_json_input_file,test_yaml_input_file): + + mod = MyDualParser(args=['--input_yaml', test_yaml_input_file]) + assert mod.args['a']==5 + assert mod.args['nest']==input_data['nest'] + + mod = MyDualParser(args=['--input_json', test_json_input_file]) + assert mod.args['a']==5 + assert mod.args['nest']==input_data['nest'] + +def test_dual_parser_fail(test_json_input_file,test_yaml_input_file): + with pytest.raises(MultipleConfiguredSourceError): + mod = MyDualParser(args=['--input_yaml', test_yaml_input_file, '--input_json', test_json_input_file]) \ No newline at end of file From 46038b9c2a12de6495d968771968784b5cc2163b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 09:43:27 -0800 Subject: [PATCH 17/81] made it error if more than one output sink configured --- argschema/argschema_parser.py | 53 +++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 7ef46ac3..71a8c680 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -155,16 +155,11 @@ def __init__(self, self.logger.debug('args after merge {}'.format(args)) # if the output source was not passed in, see if there is a configuration in the combined args - if output_sink is None: - for OutputSink in self.output_config_map: - try: - output_config_d = OutputSink.get_config( - OutputSink.ConfigSchema, args) - output_sink = OutputSink(**output_config_d) - except NotConfiguredSourceError: - pass + if output_sink is None: + output_sink = self.__get_output_sink_from_config(args) # save the output source for later self.output_sink = output_sink + # validate with load! result = self.load_schema_with_defaults(self.schema, args) @@ -173,7 +168,37 @@ def __init__(self, self.logger = self.initialize_logger( logger_name, self.args.get('log_level')) - def __get_input_data_from_config(self, d): + def __get_output_sink_from_config(self,d): + """private function to check for ArgSink configuration in a dictionary and return a configured ArgSink + + Parameters + ---------- + d : dict + dictionary to look for ArgSink Configuration parameters in + + Returns + ------- + ArgSink + A configured argsink + + Raises + ------ + MultipleConfiguredSourceError + If more than one Sink is configured + """ + output_set = False + output_sink = None + for OutputSink in self.output_config_map: + try: + output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,d) + if output_set: + raise MultipleConfiguredSourceError("more then one OutputSink configuration present in {}".format(d)) + output_sink = OutputSink(**output_config_d) + output_set=True + except NotConfiguredSourceError: + pass + + def __get_input_data_from_config(self,d): """private function to check for ArgSource configurations in a dictionary and return the data if it exists @@ -186,15 +211,19 @@ def __get_input_data_from_config(self, d): ------- dict or None dictionary of InputData if it found a valid configuration, None otherwise + + Raises + ------ + MultipleConfiguredSourceError + if more than one InputSource is configured """ input_set = False input_data = None for InputSource in self.input_config_map: try: input_data = get_input(InputSource, d) - if input_set == True: - raise MultipleConfiguredSourceError( - "more then one InputSource configuration present in {}".format(d)) + if input_set: + raise MultipleConfiguredSourceError("more then one InputSource configuration present in {}".format(d)) input_set = True except NotConfiguredSourceError as e: pass From a7993bee11f450a551494899a570c74f8700989c Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 09:52:56 -0800 Subject: [PATCH 18/81] fixed bug in output_sink method --- argschema/argschema_parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 71a8c680..892fdf34 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -159,7 +159,7 @@ def __init__(self, output_sink = self.__get_output_sink_from_config(args) # save the output source for later self.output_sink = output_sink - + # validate with load! result = self.load_schema_with_defaults(self.schema, args) @@ -197,7 +197,8 @@ def __get_output_sink_from_config(self,d): output_set=True except NotConfiguredSourceError: pass - + return output_sink + def __get_input_data_from_config(self,d): """private function to check for ArgSource configurations in a dictionary and return the data if it exists From cbb18dc490ba86da448a3ae95bb64586b59c80f9 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 09:53:11 -0800 Subject: [PATCH 19/81] added 2 output config failure test --- test/sources/test_yaml.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 40aded17..96d9ed09 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -75,4 +75,13 @@ def test_dual_parser(test_json_input_file,test_yaml_input_file): def test_dual_parser_fail(test_json_input_file,test_yaml_input_file): with pytest.raises(MultipleConfiguredSourceError): - mod = MyDualParser(args=['--input_yaml', test_yaml_input_file, '--input_json', test_json_input_file]) \ No newline at end of file + mod = MyDualParser(args=['--input_yaml', test_yaml_input_file, '--input_json', test_json_input_file]) + +def test_dual_parser_output_fail(test_json_input_file,tmpdir): + test_json_output = str(tmpdir.join('output.yml')) + test_yaml_output = str(tmpdir.join('output.json')) + with pytest.raises(MultipleConfiguredSourceError): + mod = MyDualParser(args=['--input_json', test_json_input_file, + '--output_json',test_json_output, + '--output_yaml',test_yaml_output]) + \ No newline at end of file From 03be72d8980b2b7421d33b58c669ad70a01c9f59 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:24:06 -0800 Subject: [PATCH 20/81] renaming default_configurable_sinks,sources --- argschema/argschema_parser.py | 14 +++++++------- test/sources/test_yaml.py | 5 +++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 892fdf34..5b967b95 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -102,8 +102,8 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - input_config_map = [JsonSource] - output_config_map = [JsonSink] + default_configurable_sources = [ JsonSource ] + default_configurable_sinks = [ JsonSink ] def __init__(self, input_data=None, # dictionary input as option instead of --input_json @@ -128,9 +128,9 @@ def __init__(self, # consolidate a list of the input and output source # command line configuration schemas io_schemas = [] - for in_cfg in self.input_config_map: + for in_cfg in self.default_configurable_sources: io_schemas.append(in_cfg.ConfigSchema()) - for out_cfg in self.output_config_map: + for out_cfg in self.default_configurable_sinks: io_schemas.append(out_cfg.ConfigSchema()) # build a command line parser from the input schemas and configurations @@ -188,7 +188,7 @@ def __get_output_sink_from_config(self,d): """ output_set = False output_sink = None - for OutputSink in self.output_config_map: + for OutputSink in self.default_configurable_sinks: try: output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,d) if output_set: @@ -220,8 +220,8 @@ def __get_input_data_from_config(self,d): """ input_set = False input_data = None - for InputSource in self.input_config_map: - try: + for InputSource in self.default_configurable_sources: + try: input_data = get_input(InputSource, d) if input_set: raise MultipleConfiguredSourceError("more then one InputSource configuration present in {}".format(d)) diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 96d9ed09..9f990ae3 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -11,10 +11,11 @@ class MyParser(ArgSchemaYamlParser): default_schema = MySchema default_output_schema = MyOutputSchema + default_configurable_sources = [YamlSource] class MyDualParser(MyParser): - input_config_map = [JsonSource, YamlSource] - output_config_map = [JsonSink, YamlSink] + default_configurable_sources = [JsonSource, YamlSource] + default_configurable_sinks = [JsonSink, YamlSink] input_data = { 'a': 5, From d23616a47d9bae8bb848108d9680951ec94eb4d6 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:26:06 -0800 Subject: [PATCH 21/81] doc typo --- argschema/argschema_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 5b967b95..9b131aba 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -86,7 +86,7 @@ class ArgSchemaParser(object): the schema to use to validate the output_json, used by self.output input_source : argschema.sources.source.Source a generic source of a dictionary - output_source : argschema.sources.source.Source + output_sink : argschema.sources.source.Source a generic output to put output dictionary args : list or None command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing From 435a94a24311ff3db2cc1d2cebaf562b2c94e15c Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:27:35 -0800 Subject: [PATCH 22/81] doc changes --- argschema/argschema_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 9b131aba..d04fc6cc 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -154,10 +154,10 @@ def __init__(self, args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) - # if the output source was not passed in, see if there is a configuration in the combined args + # if the output sink was not passed in, see if there is a configuration in the combined args if output_sink is None: output_sink = self.__get_output_sink_from_config(args) - # save the output source for later + # save the output sink for later self.output_sink = output_sink # validate with load! From 619b71a9e0ff3510f33a3341843e1733273f4887 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 16:40:22 -0800 Subject: [PATCH 23/81] merging changes --- argschema/argschema_parser.py | 39 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index d04fc6cc..9d32a691 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -102,8 +102,8 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - default_configurable_sources = [ JsonSource ] - default_configurable_sinks = [ JsonSink ] + default_configurable_sources = [JsonSource] + default_configurable_sinks = [JsonSink] def __init__(self, input_data=None, # dictionary input as option instead of --input_json @@ -155,8 +155,8 @@ def __init__(self, self.logger.debug('args after merge {}'.format(args)) # if the output sink was not passed in, see if there is a configuration in the combined args - if output_sink is None: - output_sink = self.__get_output_sink_from_config(args) + if output_sink is None: + output_sink = self.__get_output_sink_from_config(args) # save the output sink for later self.output_sink = output_sink @@ -168,14 +168,14 @@ def __init__(self, self.logger = self.initialize_logger( logger_name, self.args.get('log_level')) - def __get_output_sink_from_config(self,d): + def __get_output_sink_from_config(self, d): """private function to check for ArgSink configuration in a dictionary and return a configured ArgSink Parameters ---------- d : dict dictionary to look for ArgSink Configuration parameters in - + Returns ------- ArgSink @@ -189,17 +189,19 @@ def __get_output_sink_from_config(self,d): output_set = False output_sink = None for OutputSink in self.default_configurable_sinks: - try: - output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,d) - if output_set: - raise MultipleConfiguredSourceError("more then one OutputSink configuration present in {}".format(d)) - output_sink = OutputSink(**output_config_d) - output_set=True - except NotConfiguredSourceError: - pass + try: + output_config_d = OutputSink.get_config( + OutputSink.ConfigSchema, d) + if output_set: + raise MultipleConfiguredSourceError( + "more then one OutputSink configuration present in {}".format(d)) + output_sink = OutputSink(**output_config_d) + output_set = True + except NotConfiguredSourceError: + pass return output_sink - - def __get_input_data_from_config(self,d): + + def __get_input_data_from_config(self, d): """private function to check for ArgSource configurations in a dictionary and return the data if it exists @@ -212,7 +214,7 @@ def __get_input_data_from_config(self,d): ------- dict or None dictionary of InputData if it found a valid configuration, None otherwise - + Raises ------ MultipleConfiguredSourceError @@ -224,7 +226,8 @@ def __get_input_data_from_config(self,d): try: input_data = get_input(InputSource, d) if input_set: - raise MultipleConfiguredSourceError("more then one InputSource configuration present in {}".format(d)) + raise MultipleConfiguredSourceError( + "more then one InputSource configuration present in {}".format(d)) input_set = True except NotConfiguredSourceError as e: pass From 8b4662c77704369a861759318b446ca320c3f764 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 16:40:35 -0800 Subject: [PATCH 24/81] merging changes --- argschema/argschema_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 9d32a691..15525d18 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -11,7 +11,6 @@ from .sources.yaml_source import YamlSource, YamlSink from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError - def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema From 594c80e6c3a1161aa3896f9ab3cb13caacdd33e2 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:28:45 -0800 Subject: [PATCH 25/81] fix yaml parser --- argschema/argschema_parser.py | 4 ++-- test/sources/test_yaml.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 15525d18..c6d513df 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -349,5 +349,5 @@ def initialize_logger(name, log_level): class ArgSchemaYamlParser(ArgSchemaParser): - input_config_map = [YamlSource] - output_config_map = [YamlSink] + default_configurable_sources = [YamlSource] + default_configurable_sinks = [YamlSink] diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 9f990ae3..585a8d29 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -11,7 +11,6 @@ class MyParser(ArgSchemaYamlParser): default_schema = MySchema default_output_schema = MyOutputSchema - default_configurable_sources = [YamlSource] class MyDualParser(MyParser): default_configurable_sources = [JsonSource, YamlSource] From 11d6d3fa19470f275992390802cbfce7233804fa Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:29:58 -0800 Subject: [PATCH 26/81] removing pika source --- argschema/sources/pika_source.py | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 argschema/sources/pika_source.py diff --git a/argschema/sources/pika_source.py b/argschema/sources/pika_source.py deleted file mode 100644 index ee9b22a3..00000000 --- a/argschema/sources/pika_source.py +++ /dev/null @@ -1,29 +0,0 @@ -from .source import InputSource -import pika -import json - -class PikaJsonSource(InputSource): - - def __init__(self,channel,queue): - """Pika client source for dictionary - - Parameters - ---------- - channel: pika.channel.Channel - pika client channel to connect to - queue: str - queue name to get message from - """ - assert(type(channel)==pika.channel.Channel) - self.channel = channel - self.queue = queue - - def get_dict(self): - method_frame, header_frame, body = self.channel.basic_get(self.queue) - if method_frame: - d = json.loads(body) - self.channel.basic_ack(method_frame.delivery_tag) - return d - - def put_dict(self,d): - \ No newline at end of file From 6e0a99650445077d414994df37011b69e4915118 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:19:38 -0800 Subject: [PATCH 27/81] made ConfigSource default __init__ and added auto imports to sources sub-package --- argschema/sources/__init__.py | 2 ++ argschema/sources/source.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/argschema/sources/__init__.py b/argschema/sources/__init__.py index e69de29b..4a732b36 100644 --- a/argschema/sources/__init__.py +++ b/argschema/sources/__init__.py @@ -0,0 +1,2 @@ +from .source import ArgSink, ArgSource +from .json_source import JsonSource, JsonSink diff --git a/argschema/sources/source.py b/argschema/sources/source.py index bbdf51c2..f6e258b9 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -27,6 +27,10 @@ def d_contains_any_fields(schema,d): class ConfigurableSource(object): ConfigSchema = None + def __init__(self,**kwargs): + for key,value in kwargs.items(): + self.__dict__[key]=value + @staticmethod def get_config(Schema,d): schema = Schema() From dc18772f514d96d59c0157b2f78d3132cdb6487b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:19:48 -0800 Subject: [PATCH 28/81] added a test for a novel UrlSource --- test/sources/test_url.py | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 test/sources/test_url.py diff --git a/test/sources/test_url.py b/test/sources/test_url.py new file mode 100644 index 00000000..e82e4d04 --- /dev/null +++ b/test/sources/test_url.py @@ -0,0 +1,58 @@ +from argschema.sources import ArgSource +from argschema.schemas import DefaultSchema +from argschema.fields import Str,Int +from argschema import ArgSchemaParser +import requests +import mock +from test_classes import MySchema + +class UrlSourceConfig(DefaultSchema): + input_host = Str(required=True, description="host of url") + input_port = Int(required=False, default=80, description="port of url") + input_url = Str(required=True, description="location on host of input") + +class UrlSource(ArgSource): + ConfigSchema = UrlSourceConfig + def get_dict(self): + url = "http://{}:{}/{}".format(self.input_host, + self.input_port, + self.input_url) + response = requests.get(url) + return response.json() + +class UrlArgSchemaParser(ArgSchemaParser): + default_configurable_sources = [UrlSource] + default_schema = MySchema + +# This method will be used by the mock to replace requests.get +def mocked_requests_get(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + if args[0] == 'http://localhost:88/test.json': + return MockResponse({ + 'a':7, + 'nest':{ + 'one':7, + 'two':False + } + }, 200) + + + return MockResponse(None, 404) + +@mock.patch('requests.get', side_effect=mocked_requests_get) +def test_url_parser(mock_get): + input_source=UrlSource(input_host='localhost',input_port=88,input_url='test.json') + mod = UrlArgSchemaParser(input_source=input_source,args = []) + assert(mod.args['a']==7) + +@mock.patch('requests.get', side_effect=mocked_requests_get) +def test_url_parser_command_line(mock_get): + mod = UrlArgSchemaParser(args = ['--input_host','localhost','--input_port','88','--input_url','test.json']) + assert(mod.args['a']==7) From 669cffac840a560f1ff410d042170e5e9e2fb093 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:27:21 -0800 Subject: [PATCH 29/81] added a novel test for a new source UrlSource to demonstrate plugin functionality of Sources/Sinks, and wrote some documenation --- docs/user/intro.rst | 54 +++++++++++++++++++++++++++++--------- test/sources/test_url.py | 24 +---------------- test/sources/url_source.py | 24 +++++++++++++++++ 3 files changed, 67 insertions(+), 35 deletions(-) create mode 100644 test/sources/url_source.py diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 04138f7c..691f38d0 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -1,5 +1,18 @@ User Guide ===================================== +Installation +------------ +install via source code + +:: + + $ python setup.py install + +or pip + +:: + + $ pip install argschema Your First Module ------------------ @@ -181,6 +194,35 @@ example, having an invalid literal) we will see a casting validation error: argschema does not support setting :class:`~marshmallow.fields.Dict` at the command line. +Alternate Sources/Sinks +----------------------- +A json files are just one way that you might decide to store module parameter dictionaries or outputs. +For example, yaml is another perfectly reasonable choice for storing nested key values stores. Argschema by default provides +json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept +to allow ArgSchemaParser to plugin alternative "sources" and "sinks" of parameters. + +You can pass an ArgSchemaParser an `~argschema.sources.ArgSource` object which implements a get_dict method, +and `~argschema.ArgSchemaParser` will get its input parameters from that dictionary. + +Similarly you can pass an `~argschema.sources.ArgSink` object which implements a put_dict method, +and `~argschema.ArgSchemaParser.output` will output the dictionary however that ArgSink specifies it should. + +Finally, both `~argschema.sources.ArgSource` and `~argschema.sources.ArgSink` have a property called ConfigSchema, +which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. +For example, the default `~argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. +This is how `~argschema.ArgSchemaParser` is told what keys and values should be read to initialize the `~argschema.sources.ArgSource` +or `~argschema.sources.ArgSink`. + +So for example, if you wanted to define a `~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, +and a module which had a command line interface for setting that host port and url you could do so like this. + +.. literalinclude:: ../../test/url_source.py + +so now a UrlArgSchemaParser would expect command line flags of --input_host, --input_port, --input_url, and will look to download the json +from an http location via requests, or an existing ArgSchemaParser module could be simply passed an UrlSource, even though the original module +author didn't explicitly support passing parameters by http location, and the parameters will still be deserialized and validated all the same. + + Sphinx Documentation -------------------- argschema comes with a autodocumentation feature for Sphnix which will help you automatically @@ -196,19 +238,7 @@ To configure sphinx to use this function, you must be using the sphnix autodoc m def setup(app): app.connect('autodoc-process-docstring',process_schemas) -Installation ------------- -install via source code - -:: - - $ python setup.py install - -or pip - -:: - $ pip install argschema .. toctree:: diff --git a/test/sources/test_url.py b/test/sources/test_url.py index e82e4d04..60a199de 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -1,28 +1,6 @@ -from argschema.sources import ArgSource -from argschema.schemas import DefaultSchema -from argschema.fields import Str,Int -from argschema import ArgSchemaParser import requests import mock -from test_classes import MySchema - -class UrlSourceConfig(DefaultSchema): - input_host = Str(required=True, description="host of url") - input_port = Int(required=False, default=80, description="port of url") - input_url = Str(required=True, description="location on host of input") - -class UrlSource(ArgSource): - ConfigSchema = UrlSourceConfig - def get_dict(self): - url = "http://{}:{}/{}".format(self.input_host, - self.input_port, - self.input_url) - response = requests.get(url) - return response.json() - -class UrlArgSchemaParser(ArgSchemaParser): - default_configurable_sources = [UrlSource] - default_schema = MySchema +from url_source import UrlArgSchemaParser, UrlSource # This method will be used by the mock to replace requests.get def mocked_requests_get(*args, **kwargs): diff --git a/test/sources/url_source.py b/test/sources/url_source.py new file mode 100644 index 00000000..2071322c --- /dev/null +++ b/test/sources/url_source.py @@ -0,0 +1,24 @@ +from argschema.sources import ArgSource +from argschema.schemas import DefaultSchema +from argschema.fields import Str,Int +from argschema import ArgSchemaParser +from test_classes import MySchema +import requests + +class UrlSourceConfig(DefaultSchema): + input_host = Str(required=True, description="host of url") + input_port = Int(required=False, default=80, description="port of url") + input_url = Str(required=True, description="location on host of input") + +class UrlSource(ArgSource): + ConfigSchema = UrlSourceConfig + def get_dict(self): + url = "http://{}:{}/{}".format(self.input_host, + self.input_port, + self.input_url) + response = requests.get(url) + return response.json() + +class UrlArgSchemaParser(ArgSchemaParser): + default_configurable_sources = [UrlSource] + default_schema = MySchema From 7db7a87347775254377ee8290eff808c67cccf3b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:37:39 -0800 Subject: [PATCH 30/81] fixed doc include --- docs/user/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 691f38d0..a3f84af2 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -216,7 +216,7 @@ or `~argschema.sources.ArgSink`. So for example, if you wanted to define a `~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, and a module which had a command line interface for setting that host port and url you could do so like this. -.. literalinclude:: ../../test/url_source.py +.. literalinclude:: ../../test/sources/url_source.py so now a UrlArgSchemaParser would expect command line flags of --input_host, --input_port, --input_url, and will look to download the json from an http location via requests, or an existing ArgSchemaParser module could be simply passed an UrlSource, even though the original module From 9af2fe1b2524a37a769190b25bda53c4d8723fd5 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:40:59 -0800 Subject: [PATCH 31/81] fixed doc links --- docs/user/intro.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index a3f84af2..9f09d6ce 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -201,19 +201,19 @@ For example, yaml is another perfectly reasonable choice for storing nested key json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept to allow ArgSchemaParser to plugin alternative "sources" and "sinks" of parameters. -You can pass an ArgSchemaParser an `~argschema.sources.ArgSource` object which implements a get_dict method, -and `~argschema.ArgSchemaParser` will get its input parameters from that dictionary. +You can pass an ArgSchemaParser an :class:`~argschema.sources.ArgSource` object which implements a get_dict method, +and :class:`~argschema.ArgSchemaParser` will get its input parameters from that dictionary. -Similarly you can pass an `~argschema.sources.ArgSink` object which implements a put_dict method, -and `~argschema.ArgSchemaParser.output` will output the dictionary however that ArgSink specifies it should. +Similarly you can pass an :class:`~argschema.sources.ArgSink` object which implements a put_dict method, +and :method:`~argschema.ArgSchemaParser.output` will output the dictionary however that :class:`~argschema.sources.ArgSink` specifies it should. -Finally, both `~argschema.sources.ArgSource` and `~argschema.sources.ArgSink` have a property called ConfigSchema, +Finally, both :class:`~argschema.sources.ArgSource` and :class:`~argschema.sources.ArgSink` have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. -For example, the default `~argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. -This is how `~argschema.ArgSchemaParser` is told what keys and values should be read to initialize the `~argschema.sources.ArgSource` -or `~argschema.sources.ArgSink`. +For example, the default :class:`~argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. +This is how :class:`~argschema.ArgSchemaParser` is told what keys and values should be read to initialize the :class:`~argschema.sources.ArgSource` +or :class:`~argschema.sources.ArgSink`. -So for example, if you wanted to define a `~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, +So for example, if you wanted to define a :class:`~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, and a module which had a command line interface for setting that host port and url you could do so like this. .. literalinclude:: ../../test/sources/url_source.py From bc323d005ecbd2beea94e79ac76ef33e6680ae70 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:46:07 -0800 Subject: [PATCH 32/81] change tests to use normal ArgSchemaParser for urlsource --- test/sources/test_url.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/sources/test_url.py b/test/sources/test_url.py index 60a199de..ca7e2d28 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -1,7 +1,7 @@ import requests import mock -from url_source import UrlArgSchemaParser, UrlSource - +from url_source import UrlArgSchemaParser, UrlSource, MySchema +from argschema import ArgSchemaParser # This method will be used by the mock to replace requests.get def mocked_requests_get(*args, **kwargs): class MockResponse: @@ -27,7 +27,7 @@ def json(self): @mock.patch('requests.get', side_effect=mocked_requests_get) def test_url_parser(mock_get): input_source=UrlSource(input_host='localhost',input_port=88,input_url='test.json') - mod = UrlArgSchemaParser(input_source=input_source,args = []) + mod = ArgSchemaParser(schema_type=MySchema, input_source=input_source,args = []) assert(mod.args['a']==7) @mock.patch('requests.get', side_effect=mocked_requests_get) From b967ada9e2e914846c8709fa8872cd903eaf7c88 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 21:00:08 -0800 Subject: [PATCH 33/81] doc fix --- docs/user/intro.rst | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 9f09d6ce..6c059f18 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -93,19 +93,19 @@ argschema uses marshmallow (http://marshmallow.readthedocs.io/) under the hood to define the parameters schemas. It comes with a basic set of fields that you can use to define your schemas. One powerful feature of Marshmallow is that you can define custom fields that do arbitrary validation. -:class:`~argschema.fields` contains all the built-in marshmallow fields, +:class:`argschema.fields` contains all the built-in marshmallow fields, but also some useful custom ones, -such as :class:`~argschema.fields.InputFile`, -:class:`~argschema.fields.OutputFile`, -:class:`~argschema.fields.InputDir` that validate that the paths exist and have the proper +such as :class:`argschema.fields.InputFile`, +:class:`argschema.fields.OutputFile`, +:class:`argschema.fields.InputDir` that validate that the paths exist and have the proper permissions to allow files to be read or written. -Other fields, such as :class:`~argschema.fields.NumpyArray` will deserialize ordered lists of lists +Other fields, such as :class:`argschema.fields.NumpyArray` will deserialize ordered lists of lists directly into a numpy array of your choosing. -Finally, an important Field to know is :class:`~argschema.fields.Nested`, which allows you to define +Finally, an important Field to know is :class:`argschema.fields.Nested`, which allows you to define heirarchical nested structures. Note, that if you use Nested schemas, your Nested schemas should -subclass :class:`~argschema.schemas.DefaultSchema` in order that they properly fill in default values, +subclass :class:`argschema.schemas.DefaultSchema` in order that they properly fill in default values, as :class:`marshmallow.Schema` does not do that by itself. Another common question about :class:`~argschema.fields.Nested` is how you specify that @@ -201,19 +201,19 @@ For example, yaml is another perfectly reasonable choice for storing nested key json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept to allow ArgSchemaParser to plugin alternative "sources" and "sinks" of parameters. -You can pass an ArgSchemaParser an :class:`~argschema.sources.ArgSource` object which implements a get_dict method, -and :class:`~argschema.ArgSchemaParser` will get its input parameters from that dictionary. +You can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object which implements a get_dict method, +and :class:`argschema.ArgSchemaParser` will get its input parameters from that dictionary. -Similarly you can pass an :class:`~argschema.sources.ArgSink` object which implements a put_dict method, -and :method:`~argschema.ArgSchemaParser.output` will output the dictionary however that :class:`~argschema.sources.ArgSink` specifies it should. +Similarly you can pass an :class:`argschema.sources.ArgSink` object which implements a put_dict method, +and :method:`argschema.ArgSchemaParser.output` will output the dictionary however that :class:`argschema.sources.ArgSink` specifies it should. -Finally, both :class:`~argschema.sources.ArgSource` and :class:`~argschema.sources.ArgSink` have a property called ConfigSchema, +Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources.ArgSink` have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. -For example, the default :class:`~argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. -This is how :class:`~argschema.ArgSchemaParser` is told what keys and values should be read to initialize the :class:`~argschema.sources.ArgSource` -or :class:`~argschema.sources.ArgSink`. +For example, the default :class:`argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. +This is how :class:`argschema.ArgSchemaParser` is told what keys and values should be read to initialize the :class:`argschema.sources.ArgSource` +or :class:`argschema.sources.ArgSink`. -So for example, if you wanted to define a :class:`~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, +So for example, if you wanted to define a :class:`argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, and a module which had a command line interface for setting that host port and url you could do so like this. .. literalinclude:: ../../test/sources/url_source.py @@ -226,7 +226,7 @@ author didn't explicitly support passing parameters by http location, and the pa Sphinx Documentation -------------------- argschema comes with a autodocumentation feature for Sphnix which will help you automatically -add documentation of your Schemas and ArgSchemaParser classes in your project. This is how the +add documentation of your Schemas and :class:`argschema.ArgSchemaParser` classes in your project. This is how the documentation of the :doc:`../tests/modules` suite included here was generated. To configure sphinx to use this function, you must be using the sphnix autodoc module and add the following to your conf.py file From 6e3defc310dc085b42458bdce42f57953ababa33 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 21:05:33 -0800 Subject: [PATCH 34/81] doc fix --- docs/user/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 6c059f18..9e2b8982 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -205,7 +205,7 @@ You can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object w and :class:`argschema.ArgSchemaParser` will get its input parameters from that dictionary. Similarly you can pass an :class:`argschema.sources.ArgSink` object which implements a put_dict method, -and :method:`argschema.ArgSchemaParser.output` will output the dictionary however that :class:`argschema.sources.ArgSink` specifies it should. +and :class:`argschema.ArgSchemaParser.output` will output the dictionary however that :class:`argschema.sources.ArgSink` specifies it should. Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources.ArgSink` have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. From 2a885bc7e341cbc863d626dc4b8aedf4a12d45af Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 08:14:54 -0800 Subject: [PATCH 35/81] added protocol to url_source_demo --- test/sources/url_source.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/sources/url_source.py b/test/sources/url_source.py index 2071322c..a49190d1 100644 --- a/test/sources/url_source.py +++ b/test/sources/url_source.py @@ -1,4 +1,4 @@ -from argschema.sources import ArgSource +from argschema.sources import ArgSource, ArgSink from argschema.schemas import DefaultSchema from argschema.fields import Str,Int from argschema import ArgSchemaParser @@ -9,16 +9,19 @@ class UrlSourceConfig(DefaultSchema): input_host = Str(required=True, description="host of url") input_port = Int(required=False, default=80, description="port of url") input_url = Str(required=True, description="location on host of input") + input_protocol = Str(required=False, default='http') class UrlSource(ArgSource): ConfigSchema = UrlSourceConfig def get_dict(self): - url = "http://{}:{}/{}".format(self.input_host, + url = "{}://{}:{}/{}".format(self.input_protocol, + self.input_host, self.input_port, - self.input_url) + self.input_url) response = requests.get(url) return response.json() + class UrlArgSchemaParser(ArgSchemaParser): default_configurable_sources = [UrlSource] default_schema = MySchema From fd3584fcd4ea5ad2faeab1959f9f7e8f5e041d9b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:25:36 -0800 Subject: [PATCH 36/81] removed None's from argparse when configuring sources via smart_merge --- argschema/argschema_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index c6d513df..b0194a3e 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -146,7 +146,7 @@ def __init__(self, input_data = config_data if config_data is not None else input_data # check whether the command line arguments contain an input configuration and use that - config_data = self.__get_input_data_from_config(argsdict) + config_data = self.__get_input_data_from_config(utils.smart_merge({},argsdict)) input_data = config_data if config_data is not None else input_data # merge the command line dictionary into the input json From 7bdd4d2c4db9a4a855b13b6c24c953339f2e46e2 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:25:57 -0800 Subject: [PATCH 37/81] utilized marshmallow validation in Configurable source init --- argschema/sources/source.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index f6e258b9..f3cb6d1a 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -25,11 +25,27 @@ def d_contains_any_fields(schema,d): return True return False +class ConfigSourceSchema(mm.Schema): + pass + class ConfigurableSource(object): - ConfigSchema = None + ConfigSchema = ConfigSourceSchema def __init__(self,**kwargs): - for key,value in kwargs.items(): - self.__dict__[key]=value + """Configurable source + + Parameters + ---------- + **kwargs: dict + a set of keyword arguments which will be validated by this classes ConfigSchema + which will define the set of fields that are allowed (and their defaults) + """ + schema = self.ConfigSchema() + result,errors = schema.load(kwargs) + if len(errors)>0: + raise MisconfiguredSourceError('invalid keyword arguments passed {}'.format(kwargs)) + self.__dict__=result + for field_name, field in schema.declared_fields.items(): + self.__dict__[field_name]=result[field_name] @staticmethod def get_config(Schema,d): From 0c5065b240b7ab8e06947ece038319869bf50b63 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:26:15 -0800 Subject: [PATCH 38/81] added optional parameter to url_source test --- test/sources/url_source.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/sources/url_source.py b/test/sources/url_source.py index a49190d1..cc0cb540 100644 --- a/test/sources/url_source.py +++ b/test/sources/url_source.py @@ -9,15 +9,16 @@ class UrlSourceConfig(DefaultSchema): input_host = Str(required=True, description="host of url") input_port = Int(required=False, default=80, description="port of url") input_url = Str(required=True, description="location on host of input") - input_protocol = Str(required=False, default='http') + input_protocol = Str(required=False, default='http', description="url protocol to use") class UrlSource(ArgSource): ConfigSchema = UrlSourceConfig + def get_dict(self): url = "{}://{}:{}/{}".format(self.input_protocol, - self.input_host, - self.input_port, - self.input_url) + self.input_host, + self.input_port, + self.input_url) response = requests.get(url) return response.json() From 1bb697e4028d4ce12d90a9ecde143481e512f2bc Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:26:39 -0800 Subject: [PATCH 39/81] pep8 --- test/sources/test_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sources/test_url.py b/test/sources/test_url.py index ca7e2d28..448a6c7d 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -27,7 +27,7 @@ def json(self): @mock.patch('requests.get', side_effect=mocked_requests_get) def test_url_parser(mock_get): input_source=UrlSource(input_host='localhost',input_port=88,input_url='test.json') - mod = ArgSchemaParser(schema_type=MySchema, input_source=input_source,args = []) + mod = ArgSchemaParser(schema_type=MySchema, input_source=input_source, args = []) assert(mod.args['a']==7) @mock.patch('requests.get', side_effect=mocked_requests_get) From f85c219b3798ab8b60c54b98a3bcd1dd0080946f Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:26:46 -0800 Subject: [PATCH 40/81] documentation update --- docs/user/intro.rst | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 9e2b8982..8a1d648c 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -197,31 +197,41 @@ command line. Alternate Sources/Sinks ----------------------- A json files are just one way that you might decide to store module parameter dictionaries or outputs. -For example, yaml is another perfectly reasonable choice for storing nested key values stores. Argschema by default provides -json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept -to allow ArgSchemaParser to plugin alternative "sources" and "sinks" of parameters. +Argschema by default provides json support because that is what we use most frequently at the Allen Institute, +however we have generalized the concept to allow :class:`argschema.ArgSchemaParser` to plugin alternative +"sources" and "sinks" of parameters. -You can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object which implements a get_dict method, -and :class:`argschema.ArgSchemaParser` will get its input parameters from that dictionary. +For example, yaml is another perfectly reasonable choice for storing nested key values stores. +`argschema.argschema_parser.ArgSchemaYamlParser` demonstrates just that functionality. So now +input_yaml and output_yaml can be specified instead. + +Furthermore, you can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object which +implements a get_dict method, and any :class:`argschema.ArgSchemaParser` will get its input parameters +from that dictionary. Importantly, this is true even when the original module author didn't +explicitly support passing parameters from that mechanism, and the parameters will still be +deserialized and validated in a uniform manner. Similarly you can pass an :class:`argschema.sources.ArgSink` object which implements a put_dict method, -and :class:`argschema.ArgSchemaParser.output` will output the dictionary however that :class:`argschema.sources.ArgSink` specifies it should. +and :class:`argschema.ArgSchemaParser.output` will output the dictionary however that +:class:`argschema.sources.ArgSink` specifies it should. -Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources.ArgSink` have a property called ConfigSchema, -which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. -For example, the default :class:`argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. -This is how :class:`argschema.ArgSchemaParser` is told what keys and values should be read to initialize the :class:`argschema.sources.ArgSource` -or :class:`argschema.sources.ArgSink`. +Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources.ArgSink` +have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize +the kwargs to it's init class. -So for example, if you wanted to define a :class:`argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, -and a module which had a command line interface for setting that host port and url you could do so like this. +For example, the default :class:`argschema.sources.json_source.JsonSource.ConfigSchema` has one string +field of 'input_json'. This is how :class:`argschema.ArgSchemaParser` is told what keys and values +should be read to initialize the :class:`argschema.sources.ArgSource` or :class:`argschema.sources.ArgSink`. -.. literalinclude:: ../../test/sources/url_source.py +So for example, if you wanted to define a :class:`argschema.sources.ArgSource` which loaded a dictionary +from a particular host, port and url, and a module which had a command line interface for setting that +host port and url you could do so like this. -so now a UrlArgSchemaParser would expect command line flags of --input_host, --input_port, --input_url, and will look to download the json -from an http location via requests, or an existing ArgSchemaParser module could be simply passed an UrlSource, even though the original module -author didn't explicitly support passing parameters by http location, and the parameters will still be deserialized and validated all the same. +.. literalinclude:: ../../test/sources/url_source.py +so now a UrlArgSchemaParser would expect command line flags of '--input_host', '--input_port', '--input_url' +(or look for them in input_data) and will look to download the json from that http location via requests +or an existing :class:`argschema.ArgSchemaParser` module could be simply passed a configured UrlSource via input_source. Sphinx Documentation -------------------- From 504abb995e1c924e5c93f6c07eb3a815be7c1389 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 16:43:54 -0800 Subject: [PATCH 41/81] doc changes --- docs/user/intro.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 8a1d648c..79241820 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -236,10 +236,11 @@ or an existing :class:`argschema.ArgSchemaParser` module could be simply passed Sphinx Documentation -------------------- argschema comes with a autodocumentation feature for Sphnix which will help you automatically -add documentation of your Schemas and :class:`argschema.ArgSchemaParser` classes in your project. This is how the -documentation of the :doc:`../tests/modules` suite included here was generated. +add documentation of your Schemas and :class:`argschema.ArgSchemaParser` classes in your project. +This is how the documentation of the :doc:`../tests/modules` suite included here was generated. -To configure sphinx to use this function, you must be using the sphnix autodoc module and add the following to your conf.py file +To configure sphnix to use this function, you must be using the sphnix autodoc module +and add the following to your conf.py file .. code-block:: python From c34407937b3ec2e49f1f886fc63acb2bf32a2e1e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:37:28 -0800 Subject: [PATCH 42/81] pep8 --- test/sources/test_url.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/test/sources/test_url.py b/test/sources/test_url.py index 448a6c7d..f90cf9b3 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -1,8 +1,10 @@ -import requests +import requests import mock from url_source import UrlArgSchemaParser, UrlSource, MySchema from argschema import ArgSchemaParser # This method will be used by the mock to replace requests.get + + def mocked_requests_get(*args, **kwargs): class MockResponse: def __init__(self, json_data, status_code): @@ -14,23 +16,26 @@ def json(self): if args[0] == 'http://localhost:88/test.json': return MockResponse({ - 'a':7, - 'nest':{ - 'one':7, - 'two':False - } - }, 200) - - + 'a': 7, + 'nest': { + 'one': 7, + 'two': False + } + }, 200) return MockResponse(None, 404) + @mock.patch('requests.get', side_effect=mocked_requests_get) def test_url_parser(mock_get): - input_source=UrlSource(input_host='localhost',input_port=88,input_url='test.json') - mod = ArgSchemaParser(schema_type=MySchema, input_source=input_source, args = []) - assert(mod.args['a']==7) + input_source = UrlSource(input_host='localhost', + input_port=88, input_url='test.json') + mod = ArgSchemaParser(schema_type=MySchema, + input_source=input_source, args=[]) + assert(mod.args['a'] == 7) + @mock.patch('requests.get', side_effect=mocked_requests_get) def test_url_parser_command_line(mock_get): - mod = UrlArgSchemaParser(args = ['--input_host','localhost','--input_port','88','--input_url','test.json']) - assert(mod.args['a']==7) + mod = UrlArgSchemaParser( + args=['--input_host', 'localhost', '--input_port', '88', '--input_url', 'test.json']) + assert(mod.args['a'] == 7) From 6c48ef1552a4556e12e162bfcc0e4ffe8c812522 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:37:45 -0800 Subject: [PATCH 43/81] changed to use urllib to construct url --- test/sources/url_source.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/sources/url_source.py b/test/sources/url_source.py index cc0cb540..9d8f0778 100644 --- a/test/sources/url_source.py +++ b/test/sources/url_source.py @@ -4,10 +4,14 @@ from argschema import ArgSchemaParser from test_classes import MySchema import requests +try: + from urllib.parse import urlunparse +except: + from urllib import urlunparse class UrlSourceConfig(DefaultSchema): input_host = Str(required=True, description="host of url") - input_port = Int(required=False, default=80, description="port of url") + input_port = Int(required=False, default=None, description="port of url") input_url = Str(required=True, description="location on host of input") input_protocol = Str(required=False, default='http', description="url protocol to use") @@ -15,10 +19,11 @@ class UrlSource(ArgSource): ConfigSchema = UrlSourceConfig def get_dict(self): - url = "{}://{}:{}/{}".format(self.input_protocol, - self.input_host, - self.input_port, - self.input_url) + if self.input_port is None: + netloc = self.input_host + else: + netloc = "{}:{}".format(self.input_host,self.input_port) + url = urlunparse((self.input_protocol,netloc,self.input_url,None,None,None)) response = requests.get(url) return response.json() From 552c797f3eeb210c772d68249885e1a5a716e106 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:06:07 -0800 Subject: [PATCH 44/81] removed FileSource pattern --- argschema/sources/json_source.py | 24 +++++++++---------- argschema/sources/source.py | 41 ++++++++++++-------------------- argschema/sources/yaml_source.py | 21 +++++++--------- test/sources/test_json.py | 3 ++- test/sources/test_yaml.py | 9 ++++--- 5 files changed, 43 insertions(+), 55 deletions(-) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index b2754362..d87b4ca4 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -1,4 +1,4 @@ -from .source import FileSource, FileSink +from .source import ArgSource, ArgSink import json import marshmallow as mm import argschema @@ -11,19 +11,17 @@ class JsonOutputConfigSchema(mm.Schema): output_json = argschema.fields.OutputFile(required=True, description = 'filepath to save output_json') -class JsonSource(FileSource): +class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema + + def get_dict(self): + with open(self.input_json,'r') as fp: + return json.load(fp) - def __init__(self,input_json=None): - self.filepath = input_json - def read_file(self,fp): - return json.load(fp) - -class JsonSink(FileSink): +class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def __init__(self,output_json=None): - self.filepath = output_json - - def write_file(self,fp,d): - json.dump(d,fp) + def put_dict(self,d): + with open(self.output_json,'w') as fp: + json.dump(d,fp) + diff --git a/argschema/sources/source.py b/argschema/sources/source.py index f3cb6d1a..42e391b9 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -19,6 +19,8 @@ class MultipleConfiguredSourceError(ConfigurableSourceError): pass def d_contains_any_fields(schema,d): + if len(schema.declared_fields)==0: + return True for field_name, field in schema.declared_fields.items(): if field_name in d.keys(): if d[field_name] is not None: @@ -40,12 +42,8 @@ def __init__(self,**kwargs): which will define the set of fields that are allowed (and their defaults) """ schema = self.ConfigSchema() - result,errors = schema.load(kwargs) - if len(errors)>0: - raise MisconfiguredSourceError('invalid keyword arguments passed {}'.format(kwargs)) - self.__dict__=result - for field_name, field in schema.declared_fields.items(): - self.__dict__[field_name]=result[field_name] + result = self.get_config(self.ConfigSchema,kwargs) + self.__dict__.update(result) @staticmethod def get_config(Schema,d): @@ -58,30 +56,21 @@ def get_config(Schema,d): raise MisconfiguredSourceError("Source incorrectly configured\n" + json.dumps(errors, indent=2)) else: return result + class ArgSource(ConfigurableSource): def get_dict(self): pass -class ArgSink(ConfigurableSource): - def put_dict(self,d): - pass - -class FileSource(ArgSource): - - def get_dict(self): - with open(self.filepath,'r') as fp: - d = self.read_file(fp) - return d - - def read_file(self,fp): - pass - -class FileSink(ArgSink): - - def write_file(self,fp,d): - pass +def get_input_from_config(ArgSource, config_d): + if config_d is not None: + input_config_d = ArgSource.get_config(ArgSource.ConfigSchema, config_d) + input_source = ArgSource(**input_config_d) + input_data = input_source.get_dict() + return input_data + else: + raise NotConfiguredSourceError('No dictionary provided') +class ArgSink(ConfigurableSource): def put_dict(self,d): - with open(self.filepath,'w') as fp: - self.write_file(fp,d) \ No newline at end of file + pass \ No newline at end of file diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index 480f4d87..1692b9dd 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,5 +1,5 @@ import yaml -from .source import FileSource,FileSink +from .source import ArgSource,ArgSink import argschema import marshmallow as mm @@ -11,20 +11,17 @@ class YamlOutputConfigSchema(mm.Schema): output_yaml = argschema.fields.OutputFile(required=True, description = 'filepath to save output yaml') -class YamlSource(FileSource): +class YamlSource(ArgSource): ConfigSchema = YamlInputConfigSchema - def __init__(self,input_yaml=None): - self.filepath = input_yaml + def get_dict(self): + with open(self.input_yaml,'r') as fp: + return yaml.load(fp) - def read_file(self,fp): - return yaml.load(fp) - -class YamlSink(FileSink): +class YamlSink(ArgSink): ConfigSchema = YamlOutputConfigSchema - def __init__(self,output_yaml=None): - self.filepath = output_yaml + def put_dict(self,d): + with open(self.output_yaml,'w') as fp: + yaml.dump(d,fp,default_flow_style=False) - def write_file(self,fp,d): - yaml.dump(d,fp,default_flow_style=False) \ No newline at end of file diff --git a/test/sources/test_json.py b/test/sources/test_json.py index 3055cb20..1acc001b 100644 --- a/test/sources/test_json.py +++ b/test/sources/test_json.py @@ -22,7 +22,8 @@ def test_input_file(tmpdir_factory): return str(file_in) def test_json_source(test_input_file): - mod = MyParser(input_source= JsonSource(test_input_file), args=[]) + source = JsonSource(input_json=test_input_file) + mod = MyParser(input_source= source, args=[]) def test_json_source_command(test_input_file): mod = MyParser(args = ['--input_json',test_input_file]) \ No newline at end of file diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 585a8d29..96c0bce2 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -42,7 +42,8 @@ def test_json_input_file(tmpdir_factory): def test_yaml_source(test_yaml_input_file): - mod = MyParser(input_source=YamlSource(test_yaml_input_file), args=[]) + source = YamlSource(input_yaml=test_yaml_input_file) + mod = MyParser(input_source=source, args=[]) def test_yaml_source_command(test_yaml_input_file): @@ -54,8 +55,10 @@ def test_yaml_sink(test_yaml_input_file, tmpdir): output_data = { 'a': 3 } - mod = MyParser(input_source=YamlSource(test_yaml_input_file), - output_sink=YamlSink(str(outfile))) + source = YamlSource(input_yaml=test_yaml_input_file) + sink = YamlSink(output_yaml = str(outfile)) + mod = MyParser(input_source=source, + output_sink=sink) mod.output(output_data) with open(str(outfile), 'r') as fp: From e5a2c29ee10ef65b249c4f3c2952706be94423a3 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:06:22 -0800 Subject: [PATCH 45/81] moved get_input to source module --- argschema/argschema_parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index b0194a3e..fed56a51 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -9,7 +9,7 @@ import marshmallow as mm from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink -from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError +from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError, get_input_from_config def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -222,8 +222,8 @@ def __get_input_data_from_config(self, d): input_set = False input_data = None for InputSource in self.default_configurable_sources: - try: - input_data = get_input(InputSource, d) + try: + input_data = get_input_from_config(InputSource, d) if input_set: raise MultipleConfiguredSourceError( "more then one InputSource configuration present in {}".format(d)) From aa684e7a24e38d390d6efee0e9ea8f1925bd1969 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:07:40 -0800 Subject: [PATCH 46/81] fixed python2 import --- test/sources/url_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sources/url_source.py b/test/sources/url_source.py index 9d8f0778..56e67808 100644 --- a/test/sources/url_source.py +++ b/test/sources/url_source.py @@ -7,7 +7,7 @@ try: from urllib.parse import urlunparse except: - from urllib import urlunparse + from urlparse import urlunparse class UrlSourceConfig(DefaultSchema): input_host = Str(required=True, description="host of url") From 58fa35345bf401cc4a3ad6cce5f79b1969079818 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:18:23 -0800 Subject: [PATCH 47/81] added docstrings --- argschema/sources/source.py | 102 ++++++++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 16 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 42e391b9..3671fd70 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,38 +1,61 @@ import json import marshmallow as mm + class ConfigurableSourceError(mm.ValidationError): """Base Exception class for configurable sources""" pass + class MisconfiguredSourceError(ConfigurableSourceError): """Exception when a source configuration was present in part but failed validation""" pass + class NotConfiguredSourceError(ConfigurableSourceError): """Exception when the source configuration is simply completely missing""" pass + class MultipleConfiguredSourceError(ConfigurableSourceError): """Exception when there is more than one validly configured Source configured""" pass -def d_contains_any_fields(schema,d): - if len(schema.declared_fields)==0: + +def d_contains_any_fields(schema, d): + """function to test if a dictionary contains any elements of a schema + + Parameters + ---------- + schema: marshmallow.Schema + a marshmallow schema to test d with + d: dict + the dictionary to test whether it contains any elements of a schema + + Returns + ------- + bool: + True/False whether d contains any elements of a schema. If a schema contains no elements, returns True + """ + + if len(schema.declared_fields) == 0: return True for field_name, field in schema.declared_fields.items(): if field_name in d.keys(): if d[field_name] is not None: - return True + return True return False + class ConfigSourceSchema(mm.Schema): pass + class ConfigurableSource(object): ConfigSchema = ConfigSourceSchema - def __init__(self,**kwargs): + + def __init__(self, **kwargs): """Configurable source Parameters @@ -42,27 +65,73 @@ def __init__(self,**kwargs): which will define the set of fields that are allowed (and their defaults) """ schema = self.ConfigSchema() - result = self.get_config(self.ConfigSchema,kwargs) + result = self.get_config(self.ConfigSchema, kwargs) self.__dict__.update(result) - + @staticmethod - def get_config(Schema,d): - schema = Schema() - if not d_contains_any_fields(schema,d): - raise NotConfiguredSourceError("This source is not present in \n" + json.dumps(d, indent=2)) + def get_config(ConfigSchema, d): + """A static method to get the proper validated configuration keyword arguments/dictionary + of a Configurable source from a dictionary + + Parameters + ---------- + ConfigSchema: marshmallow.Schema + a marshmallow schema that defines the configuration schema for this ConfigurableSource + d: dict + a dictionary that might contain a proper configuration of this schema + + Returns + ------- + dict + a dictionary of configuration values that has been properly deserialized and validated by + ConfigSchema + Raises + ------ + NotConfiguredSourceError + if the configation dictionary does not contain a configuration for this source + MisconfiguredSourceError + if the configuration dictionary contains a configuration but it is invalid + """ + schema = ConfigSchema() + if not d_contains_any_fields(schema, d): + raise NotConfiguredSourceError( + "This source is not present in \n" + json.dumps(d, indent=2)) else: - result,errors = schema.load(d) - if len(errors)>0: - raise MisconfiguredSourceError("Source incorrectly configured\n" + json.dumps(errors, indent=2)) + result, errors = schema.load(d) + if len(errors) > 0: + raise MisconfiguredSourceError( + "Source incorrectly configured\n" + json.dumps(errors, indent=2)) else: return result - + class ArgSource(ConfigurableSource): def get_dict(self): pass + def get_input_from_config(ArgSource, config_d): + """function to return the input dictionary from an ArgSource, given a configuration dictionary + + Parameters + ---------- + ArgSource: class(ArgSource) + The ArgSource class subclass that you want to get input from + config_d: a dictionary that might contain a configuration for this source + + Returns + ------- + dict + a dictionary returned by ArgSource.get_dict() after validating configuration + and instantiating an ArgSource instance + + Raises + ------ + NotConfiguredSourceError + if the configation dictionary does not contain a configuration for this source + MisconfiguredSourceError + if the configuration dictionary contains a configuration but it is invalid + """ if config_d is not None: input_config_d = ArgSource.get_config(ArgSource.ConfigSchema, config_d) input_source = ArgSource(**input_config_d) @@ -71,6 +140,7 @@ def get_input_from_config(ArgSource, config_d): else: raise NotConfiguredSourceError('No dictionary provided') + class ArgSink(ConfigurableSource): - def put_dict(self,d): - pass \ No newline at end of file + def put_dict(self, d): + pass From ef3114898cf28746e3171a82db8cabbc08d34949 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:21:42 -0800 Subject: [PATCH 48/81] added doc strings to methods that need to be implemented --- argschema/sources/source.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 3671fd70..cf5c064b 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -107,6 +107,7 @@ def get_config(ConfigSchema, d): class ArgSource(ConfigurableSource): def get_dict(self): + """method that must be implemented to enable an ArgSource to return a dictionary""" pass @@ -143,4 +144,11 @@ def get_input_from_config(ArgSource, config_d): class ArgSink(ConfigurableSource): def put_dict(self, d): + """method that must be implemented to enable an ArgSink to write a dictionary + + Parameters + ---------- + d: dict + the dictionary to write + """ pass From b31faea868bfc6171b6ea5f14cafdc59c33d10b3 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:22:00 -0800 Subject: [PATCH 49/81] typo --- argschema/sources/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index cf5c064b..d9dd32bc 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -107,7 +107,7 @@ def get_config(ConfigSchema, d): class ArgSource(ConfigurableSource): def get_dict(self): - """method that must be implemented to enable an ArgSource to return a dictionary""" + """method that must be implemented to enable an ArgSource to return a dictionary""" pass From f0a91161faa2c882c242de9b9c854b688a82a1e5 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:27:30 -0800 Subject: [PATCH 50/81] documenation fixes --- argschema/argschema_parser.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index fed56a51..28f99b78 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -78,19 +78,20 @@ class ArgSchemaParser(object): Parameters ---------- input_data : dict or None - dictionary parameters to fall back on if all source aren't present + dictionary parameters to fall back on if not source is given or configured via command line schema_type : schemas.ArgSchema the schema to use to validate the parameters output_schema_type : marshmallow.Schema - the schema to use to validate the output_json, used by self.output + the schema to use to validate the output, used by self.output input_source : argschema.sources.source.Source a generic source of a dictionary output_sink : argschema.sources.source.Source - a generic output to put output dictionary + a generic sink to write output dictionary to args : list or None - command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing + command line arguments passed to the module, if None use argparse to parse the command line, + set to [] if you want to bypass command line parsing logger_name : str - name of logger from the logging module you want to instantiate 'argschema' + name of logger from the logging module you want to instantiate default ('argschema') Raises ------- @@ -287,7 +288,7 @@ def output(self, d, output_path=None, sink=None, **sink_options): output_d = self.get_output_json(d) if output_path is not None: - self.logger.warning('DEPRECATED, pass sink instead') + self.logger.warning('DEPRECATED, pass output_sink instead') sink = JsonSink(output_json=output_path) if sink is not None: sink.put_dict(output_d) From 2bb6f8e9040bcc4c2b14de82bde32c6788719dda Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:52:50 -0800 Subject: [PATCH 51/81] doc fixes --- docs/user/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 79241820..47b49b39 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -202,7 +202,7 @@ however we have generalized the concept to allow :class:`argschema.ArgSchemaPars "sources" and "sinks" of parameters. For example, yaml is another perfectly reasonable choice for storing nested key values stores. -`argschema.argschema_parser.ArgSchemaYamlParser` demonstrates just that functionality. So now +:class:`argschema.argschema_parser.ArgSchemaYamlParser` demonstrates just that functionality. So now input_yaml and output_yaml can be specified instead. Furthermore, you can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object which From 5a43bb5f0dbd4a87489ccb3b13f185aa3669ef1b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 15 Jan 2018 08:48:44 -0800 Subject: [PATCH 52/81] doc update --- docs/user/intro.rst | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 47b49b39..87a342e9 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -196,12 +196,12 @@ command line. Alternate Sources/Sinks ----------------------- -A json files are just one way that you might decide to store module parameter dictionaries or outputs. +Json files are just one way that you might decide to serialize module parameters or outputs. Argschema by default provides json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept to allow :class:`argschema.ArgSchemaParser` to plugin alternative -"sources" and "sinks" of parameters. +"sources" and "sinks" of dictionary inputs and outputs. -For example, yaml is another perfectly reasonable choice for storing nested key values stores. +For example, yaml is another reasonable choice for storing nested key-value stores. :class:`argschema.argschema_parser.ArgSchemaYamlParser` demonstrates just that functionality. So now input_yaml and output_yaml can be specified instead. @@ -219,9 +219,10 @@ Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. -For example, the default :class:`argschema.sources.json_source.JsonSource.ConfigSchema` has one string +For example, the default :class:`argschema.sources.json_source.JsonSource` has one string field of 'input_json'. This is how :class:`argschema.ArgSchemaParser` is told what keys and values -should be read to initialize the :class:`argschema.sources.ArgSource` or :class:`argschema.sources.ArgSink`. +should be read to initialize a :class:`argschema.sources.ArgSource` or + :class:`argschema.sources.ArgSink` instance. So for example, if you wanted to define a :class:`argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, and a module which had a command line interface for setting that @@ -229,9 +230,11 @@ host port and url you could do so like this. .. literalinclude:: ../../test/sources/url_source.py -so now a UrlArgSchemaParser would expect command line flags of '--input_host', '--input_port', '--input_url' -(or look for them in input_data) and will look to download the json from that http location via requests -or an existing :class:`argschema.ArgSchemaParser` module could be simply passed a configured UrlSource via input_source. +so now a UrlArgSchemaParser would expect command line flags of '--input_host' and '--input_url', and +optionally '--input_port','--input_protocol' (or look for them in input_data) and will look to download +the json from that http location via requests. In addition, an existing :class:`argschema.ArgSchemaParser` +module could be simply passed a configured UrlSource via input_source, +and it would get its parameters from there. Sphinx Documentation -------------------- From 88c0c81b29bd993399773e3ae1d8c5df094bc02d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 18:34:57 -0800 Subject: [PATCH 53/81] wired up options for json sink --- argschema/argschema_parser.py | 4 +-- argschema/sources/json_source.py | 4 +-- argschema/utils.py | 49 +++++++++++++++++++++++++------- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 28f99b78..f9356399 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -136,7 +136,7 @@ def __init__(self, # build a command line parser from the input schemas and configurations p = utils.schema_argparser(self.schema, io_schemas) argsobj = p.parse_args(args) - argsdict = utils.args_to_dict(argsobj, self.schema) + argsdict = utils.args_to_dict(argsobj, [self.schema]+io_schemas) self.logger.debug('argsdict is {}'.format(argsdict)) # if you received an input_source, get the dictionary from there @@ -291,7 +291,7 @@ def output(self, d, output_path=None, sink=None, **sink_options): self.logger.warning('DEPRECATED, pass output_sink instead') sink = JsonSink(output_json=output_path) if sink is not None: - sink.put_dict(output_d) + sink.put_dict(output_d,**sink_options) else: self.output_sink.put_dict(output_d, **sink_options) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index d87b4ca4..c601229e 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -21,7 +21,7 @@ def get_dict(self): class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def put_dict(self,d): + def put_dict(self,d,**json_options): with open(self.output_json,'w') as fp: - json.dump(d,fp) + json.dump(d,fp,**json_options) diff --git a/argschema/utils.py b/argschema/utils.py index 17d14ba3..eaeaf60e 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -82,16 +82,42 @@ def cli_error_dict(arg_path, field_type, index=0): else: return {arg_path[index]: cli_error_dict(arg_path, field_type, index + 1)} +def get_field_def_from_schema(parts,schema): + """function to get a field_definition from a particular key, specified by it's parts list -def args_to_dict(argsobj, schema=None): + Parameters + ---------- + parts : list[str] + the list of keys to get this schema + schema: marshmallow.Schema + the marshmallow schema to look up this key + + Returns + ------- + marshmallow.Field or None + returns the field in the schema if it exists, otherwise returns None + """ + current_schema = schema + for part in parts: + if part not in current_schema.fields.keys(): + return None + else: + if current_schema.only and part not in current_schema.only: + field_def = None + else: + field_def = current_schema.fields[part] + if isinstance(field_def, fields.Nested): + current_schema = field_def.schema + return field_def +def args_to_dict(argsobj, schemas=None): """function to convert namespace returned by argsparse into a nested dictionary Parameters ---------- argsobj : argparse.Namespace Namespace object returned by standard argparse.parse function - schema : marshmallow.Schema - Optional schema which will be used to cast fields via `FIELD_TYPE_MAP` + schemas : list[marshmallow.Schema] + Optional list of schemas which will be used to cast fields via `FIELD_TYPE_MAP` Returns @@ -105,18 +131,19 @@ def args_to_dict(argsobj, schema=None): errors = {} field_def = None for field in argsdict.keys(): - current_schema = schema parts = field.split('.') root = d for i in range(len(parts)): - if current_schema is not None: - if current_schema.only and parts[i] not in current_schema.only: - field_def = None - else: - field_def = current_schema.fields[parts[i]] - if isinstance(field_def, fields.Nested): - current_schema = field_def.schema + if i == (len(parts) - 1): + field_def = None + for schema in schemas: + field_def = get_field_def_from_schema(parts,schema) + if field_def is not None: + break + + #field_def = next(get_field_def(parts,schema) for schema in schemas if field_in_schema(parts,schema)) + value = argsdict.get(field) if value is not None: try: From 67ff09a8daed56f660332ef9f32b92bf3310479e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 18:43:09 -0800 Subject: [PATCH 54/81] fixed docstring --- argschema/argschema_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index f9356399..009a3e31 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -276,10 +276,10 @@ def output(self, d, output_path=None, sink=None, **sink_options): output_sink to output to (optional default to self.output_source) output_path: str path to save to output file, optional (with default to self.mod['output_json'] location) + (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) **sink_options : will be passed through to sink.put_dict - - (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) + Raises ------ marshmallow.ValidationError From 024c5f05f4a2fca1046a8f461c7c7678c1a4ea2e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 19:06:39 -0800 Subject: [PATCH 55/81] wip --- argschema/argschema_parser.py | 9 +++------ argschema/sources/json_source.py | 10 ++++++---- test/test_argschema_parser.py | 1 - test/test_output.py | 4 +++- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 009a3e31..cf2c1efb 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -264,7 +264,7 @@ def get_output_json(self, d): return output_json - def output(self, d, output_path=None, sink=None, **sink_options): + def output(self,d,sink=None): """method for outputing dictionary to the output_json file path after validating it through the output_schema_type @@ -287,13 +287,10 @@ def output(self, d, output_path=None, sink=None, **sink_options): """ output_d = self.get_output_json(d) - if output_path is not None: - self.logger.warning('DEPRECATED, pass output_sink instead') - sink = JsonSink(output_json=output_path) if sink is not None: - sink.put_dict(output_d,**sink_options) + sink.put_dict(output_d) else: - self.output_sink.put_dict(output_d, **sink_options) + self.output_sink.put_dict(output_d) def load_schema_with_defaults(self, schema, args): """method for deserializing the arguments dictionary (args) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index c601229e..1e3bbdbc 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -10,18 +10,20 @@ class JsonInputConfigSchema(mm.Schema): class JsonOutputConfigSchema(mm.Schema): output_json = argschema.fields.OutputFile(required=True, description = 'filepath to save output_json') - + output_json_indent = argschema.fields.Int(required=False, + default = mm.missing, + description = 'whether to indent options or not') class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema def get_dict(self): with open(self.input_json,'r') as fp: - return json.load(fp) + return json.load(fp,) class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def put_dict(self,d,**json_options): + def put_dict(self,d): with open(self.output_json,'w') as fp: - json.dump(d,fp,**json_options) + json.dump(d,fp,indent=self.output_json_indent) diff --git a/test/test_argschema_parser.py b/test/test_argschema_parser.py index c63a85b2..6cb42d0e 100644 --- a/test/test_argschema_parser.py +++ b/test/test_argschema_parser.py @@ -88,7 +88,6 @@ def test_parser_output(tmpdir_factory): } } mod = MyParser(input_data=input_data, args=[]) - mod.output(mod.args, output_path=str(json_path), indent=2) with open(str(json_path), 'r') as jf: obt = json.load(jf) diff --git a/test/test_output.py b/test/test_output.py index 0678f3a7..5bc8583b 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -1,6 +1,7 @@ from argschema import ArgSchemaParser from argschema.schemas import DefaultSchema from argschema.fields import Str, Int, NumpyArray +from argschema.sources import JsonSink import json import numpy as np import pytest @@ -96,7 +97,8 @@ def test_alt_output(tmpdir): "b": 5, "M": M } - mod.output(output, str(file_out_2)) + sink = JsonSink(output_json=str(file_out)) + mod.output(output, sink=sink) with open(str(file_out_2), 'r') as fp: actual_output = json.load(fp) assert actual_output == expected_output From 7f895af21d515c694dcd51e4bb071df1b9fda104 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 19:18:53 -0800 Subject: [PATCH 56/81] alternative way of handling indent options --- argschema/sources/json_source.py | 7 ++++--- test/test_output.py | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 1e3bbdbc..558ab365 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -11,7 +11,6 @@ class JsonOutputConfigSchema(mm.Schema): output_json = argschema.fields.OutputFile(required=True, description = 'filepath to save output_json') output_json_indent = argschema.fields.Int(required=False, - default = mm.missing, description = 'whether to indent options or not') class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema @@ -22,8 +21,10 @@ def get_dict(self): class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - + def __init__(self,output_json=None,output_json_indent=None): + self.output_json = output_json + self.indent = output_json_indent def put_dict(self,d): with open(self.output_json,'w') as fp: - json.dump(d,fp,indent=self.output_json_indent) + json.dump(d,fp,indent=self.indent) diff --git a/test/test_output.py b/test/test_output.py index 5bc8583b..5878ec7a 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -97,9 +97,9 @@ def test_alt_output(tmpdir): "b": 5, "M": M } - sink = JsonSink(output_json=str(file_out)) - mod.output(output, sink=sink) - with open(str(file_out_2), 'r') as fp: + sink = JsonSink(output_json= str(file_out_2)) + mod.output(output,sink=sink) + with open(str(file_out_2),'r') as fp: actual_output = json.load(fp) assert actual_output == expected_output From 4fadd6d53a33d4413e151ae2dc2a7285d5a89f40 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 28 Apr 2018 08:48:54 -0700 Subject: [PATCH 57/81] style changes --- argschema/sources/source.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index d9dd32bc..a66e92d3 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,4 +1,3 @@ -import json import marshmallow as mm @@ -56,7 +55,7 @@ class ConfigurableSource(object): ConfigSchema = ConfigSourceSchema def __init__(self, **kwargs): - """Configurable source + """Configurable source Parameters ---------- @@ -64,7 +63,7 @@ def __init__(self, **kwargs): a set of keyword arguments which will be validated by this classes ConfigSchema which will define the set of fields that are allowed (and their defaults) """ - schema = self.ConfigSchema() + self.schema = self.ConfigSchema() result = self.get_config(self.ConfigSchema, kwargs) self.__dict__.update(result) @@ -95,12 +94,12 @@ def get_config(ConfigSchema, d): schema = ConfigSchema() if not d_contains_any_fields(schema, d): raise NotConfiguredSourceError( - "This source is not present in \n" + json.dumps(d, indent=2)) + "This source is not present in \n {}".format(d)) else: result, errors = schema.load(d) if len(errors) > 0: raise MisconfiguredSourceError( - "Source incorrectly configured\n" + json.dumps(errors, indent=2)) + "Source incorrectly configured\n {}".format(errors)) else: return result @@ -123,7 +122,7 @@ def get_input_from_config(ArgSource, config_d): Returns ------- dict - a dictionary returned by ArgSource.get_dict() after validating configuration + a dictionary returned by ArgSource.get_dict() after validating configuration and instantiating an ArgSource instance Raises @@ -145,7 +144,7 @@ def get_input_from_config(ArgSource, config_d): class ArgSink(ConfigurableSource): def put_dict(self, d): """method that must be implemented to enable an ArgSink to write a dictionary - + Parameters ---------- d: dict From 7e0540f9b366b86d333f2a67472d2f164d596563 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 28 Apr 2018 14:46:15 -0700 Subject: [PATCH 58/81] flake8 --- argschema/argschema_parser.py | 16 ++++++++-------- argschema/schemas.py | 2 +- argschema/utils.py | 13 ++++++++----- test/test_output.py | 6 +++--- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index cf2c1efb..adaf9aa0 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -1,16 +1,15 @@ '''Module that contains the base class ArgSchemaParser which should be subclassed when using this library ''' -import json import logging from . import schemas from . import utils -from . import fields import marshmallow as mm from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError, get_input_from_config + def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -88,7 +87,7 @@ class ArgSchemaParser(object): output_sink : argschema.sources.source.Source a generic sink to write output dictionary to args : list or None - command line arguments passed to the module, if None use argparse to parse the command line, + command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing logger_name : str name of logger from the logging module you want to instantiate default ('argschema') @@ -136,7 +135,7 @@ def __init__(self, # build a command line parser from the input schemas and configurations p = utils.schema_argparser(self.schema, io_schemas) argsobj = p.parse_args(args) - argsdict = utils.args_to_dict(argsobj, [self.schema]+io_schemas) + argsdict = utils.args_to_dict(argsobj, [self.schema] + io_schemas) self.logger.debug('argsdict is {}'.format(argsdict)) # if you received an input_source, get the dictionary from there @@ -147,7 +146,8 @@ def __init__(self, input_data = config_data if config_data is not None else input_data # check whether the command line arguments contain an input configuration and use that - config_data = self.__get_input_data_from_config(utils.smart_merge({},argsdict)) + config_data = self.__get_input_data_from_config( + utils.smart_merge({}, argsdict)) input_data = config_data if config_data is not None else input_data # merge the command line dictionary into the input json @@ -264,14 +264,14 @@ def get_output_json(self, d): return output_json - def output(self,d,sink=None): + def output(self, d, sink=None): """method for outputing dictionary to the output_json file path after validating it through the output_schema_type Parameters ---------- d:dict - output dictionary to output + output dictionary to output sink: argschema.sources.source.ArgSink output_sink to output to (optional default to self.output_source) output_path: str @@ -279,7 +279,7 @@ def output(self,d,sink=None): (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) **sink_options : will be passed through to sink.put_dict - + Raises ------ marshmallow.ValidationError diff --git a/argschema/schemas.py b/argschema/schemas.py index 48764c40..9ad7831f 100644 --- a/argschema/schemas.py +++ b/argschema/schemas.py @@ -1,5 +1,5 @@ import marshmallow as mm -from .fields import LogLevel, InputFile, OutputFile +from .fields import LogLevel class DefaultSchema(mm.Schema): diff --git a/argschema/utils.py b/argschema/utils.py index eaeaf60e..f5905300 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -82,7 +82,8 @@ def cli_error_dict(arg_path, field_type, index=0): else: return {arg_path[index]: cli_error_dict(arg_path, field_type, index + 1)} -def get_field_def_from_schema(parts,schema): + +def get_field_def_from_schema(parts, schema): """function to get a field_definition from a particular key, specified by it's parts list Parameters @@ -91,7 +92,7 @@ def get_field_def_from_schema(parts,schema): the list of keys to get this schema schema: marshmallow.Schema the marshmallow schema to look up this key - + Returns ------- marshmallow.Field or None @@ -109,6 +110,8 @@ def get_field_def_from_schema(parts,schema): if isinstance(field_def, fields.Nested): current_schema = field_def.schema return field_def + + def args_to_dict(argsobj, schemas=None): """function to convert namespace returned by argsparse into a nested dictionary @@ -138,11 +141,11 @@ def args_to_dict(argsobj, schemas=None): if i == (len(parts) - 1): field_def = None for schema in schemas: - field_def = get_field_def_from_schema(parts,schema) + field_def = get_field_def_from_schema(parts, schema) if field_def is not None: break - - #field_def = next(get_field_def(parts,schema) for schema in schemas if field_in_schema(parts,schema)) + + # field_def = next(get_field_def(parts,schema) for schema in schemas if field_in_schema(parts,schema)) value = argsdict.get(field) if value is not None: diff --git a/test/test_output.py b/test/test_output.py index 5878ec7a..4f24de73 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -97,9 +97,9 @@ def test_alt_output(tmpdir): "b": 5, "M": M } - sink = JsonSink(output_json= str(file_out_2)) - mod.output(output,sink=sink) - with open(str(file_out_2),'r') as fp: + sink = JsonSink(output_json=str(file_out_2)) + mod.output(output, sink=sink) + with open(str(file_out_2), 'r') as fp: actual_output = json.load(fp) assert actual_output == expected_output From 15e4cac094f4169f56f73becb137b0c34238ff5c Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:16:18 -0800 Subject: [PATCH 59/81] added marshmallow 3 compatability --- argschema/sources/source.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index a66e92d3..e1154130 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -96,10 +96,11 @@ def get_config(ConfigSchema, d): raise NotConfiguredSourceError( "This source is not present in \n {}".format(d)) else: - result, errors = schema.load(d) - if len(errors) > 0: + try: + result = schema.load(d, unknown=mm.EXCLUDE) + except mm.ValidationError as e: raise MisconfiguredSourceError( - "Source incorrectly configured\n {}".format(errors)) + "Source incorrectly configured\n {}".format(e)) else: return result From 0b82e2bfd9bd7bf1dee015e49a2c8384a18a749b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:25:57 -0800 Subject: [PATCH 60/81] remove OptionList --- argschema/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/__init__.py b/argschema/__init__.py index 4693a4b9..2906aa6e 100644 --- a/argschema/__init__.py +++ b/argschema/__init__.py @@ -1,5 +1,5 @@ '''argschema: flexible definition, validation and setting of parameters''' -from .fields import InputFile, InputDir, OutputFile, OptionList # noQA:F401 +from .fields import InputFile, InputDir, OutputFile # noQA:F401 from .schemas import ArgSchema # noQA:F401 from .argschema_parser import ArgSchemaParser # noQA:F401 From f80707214ee7c662d980651086394f87e512a3a0 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:42:02 -0800 Subject: [PATCH 61/81] fixing test --- test/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_utils.py b/test/test_utils.py index 5f440a08..134603ca 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -133,6 +133,7 @@ def test_schema_argparser_with_baseball(): parser = utils.schema_argparser(schema) help = parser.format_help() help = help.replace('\n', '').replace(' ', '') + print(help) assert( '--strikesSTRIKEShowmanystrikes(0-2)(REQUIRED)(validoptionsare[0,1,2])' in help) # in python3.9, the format changed slightly such that From 7c8f869ddeb894732331a13091929a8367342763 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:51:46 -0800 Subject: [PATCH 62/81] flake8 --- argschema/sources/json_source.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 558ab365..1dec3ee5 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -3,28 +3,34 @@ import marshmallow as mm import argschema + class JsonInputConfigSchema(mm.Schema): input_json = argschema.fields.InputFile(required=True, - description = 'filepath to input_json') + description='filepath to input_json') + class JsonOutputConfigSchema(mm.Schema): output_json = argschema.fields.OutputFile(required=True, - description = 'filepath to save output_json') + description='filepath to save output_json') output_json_indent = argschema.fields.Int(required=False, - description = 'whether to indent options or not') + description='whether to indent options or not') + + class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema def get_dict(self): - with open(self.input_json,'r') as fp: + with open(self.input_json, 'r') as fp: return json.load(fp,) + class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def __init__(self,output_json=None,output_json_indent=None): + + def __init__(self, output_json=None, output_json_indent=None): self.output_json = output_json self.indent = output_json_indent - def put_dict(self,d): - with open(self.output_json,'w') as fp: - json.dump(d,fp,indent=self.indent) + def put_dict(self, d): + with open(self.output_json, 'w') as fp: + json.dump(d, fp, indent=self.indent) From a24d0e9a57b42b7ac11a4ac402e8af344868b0fd Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:52:59 -0800 Subject: [PATCH 63/81] flake8 --- argschema/sources/json_source.py | 2 +- argschema/sources/yaml_source.py | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 1dec3ee5..61ef02d0 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -18,7 +18,7 @@ class JsonOutputConfigSchema(mm.Schema): class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema - + def get_dict(self): with open(self.input_json, 'r') as fp: return json.load(fp,) diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index 1692b9dd..571dd0cd 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,27 +1,30 @@ import yaml -from .source import ArgSource,ArgSink +from .source import ArgSource, ArgSink import argschema import marshmallow as mm + class YamlInputConfigSchema(mm.Schema): - input_yaml = argschema.fields.InputFile(required=True, - description = 'filepath to input yaml') + input_yaml = argschema.fields.InputFile(required=True, + description='filepath to input yaml') + class YamlOutputConfigSchema(mm.Schema): - output_yaml = argschema.fields.OutputFile(required=True, - description = 'filepath to save output yaml') + output_yaml = argschema.fields.OutputFile(required=True, + description='filepath to save output yaml') + class YamlSource(ArgSource): ConfigSchema = YamlInputConfigSchema - + def get_dict(self): - with open(self.input_yaml,'r') as fp: + with open(self.input_yaml, 'r') as fp: return yaml.load(fp) + class YamlSink(ArgSink): ConfigSchema = YamlOutputConfigSchema - def put_dict(self,d): - with open(self.output_yaml,'w') as fp: - yaml.dump(d,fp,default_flow_style=False) - + def put_dict(self, d): + with open(self.output_yaml, 'w') as fp: + yaml.dump(d, fp, default_flow_style=False) From cecaf49aa3d182a11819df3b223e1b18e7421013 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:54:03 -0800 Subject: [PATCH 64/81] flake8 --- argschema/fields/files.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/argschema/fields/files.py b/argschema/fields/files.py index 32f8c71b..1172d1b8 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -147,12 +147,6 @@ def _validate(self, value): validate_outpath(value) -def validate_input_path(value): - if not os.path.isfile(value): - raise mm.ValidationError("%s is not a file" % value) - elif not os.access(value, os.R_OK): - raise mm.ValidationError("%s is not readable" % value) - def validate_input_path(value): if not os.path.isfile(value): raise mm.ValidationError("%s is not a file" % value) From 7a90fcf93224e1010dbb1c4ee575a8e58029018d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:55:59 -0800 Subject: [PATCH 65/81] flake8 --- argschema/fields/files.py | 2 +- argschema/fields/numpyarrays.py | 4 ++-- argschema/fields/slice.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/argschema/fields/files.py b/argschema/fields/files.py index 1172d1b8..b9503d04 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -87,7 +87,7 @@ def _validate(self, value): path = os.path.dirname(value) except Exception as e: # pragma: no cover raise mm.ValidationError( - "%s cannot be os.path.dirname-ed" % value) # pragma: no cover + "{} cannot be os.path.dirname-ed: {}".format(value, e)) # pragma: no cover validate_outpath(path) class OutputDirModeException(Exception): diff --git a/argschema/fields/numpyarrays.py b/argschema/fields/numpyarrays.py index 2e956c4a..98e38cc2 100644 --- a/argschema/fields/numpyarrays.py +++ b/argschema/fields/numpyarrays.py @@ -27,8 +27,8 @@ def _deserialize(self, value, attr, obj, **kwargs): return np.array(value, dtype=self.dtype) except ValueError as e: raise mm.ValidationError( - 'Cannot create numpy array with type {} from data.'.format( - self.dtype)) + 'Cannot create numpy array with type {} from data: {}.'.format( + self.dtype, e)) def _serialize(self, value, attr, obj, **kwargs): if value is None: diff --git a/argschema/fields/slice.py b/argschema/fields/slice.py index 1a5d0fa8..232622cc 100644 --- a/argschema/fields/slice.py +++ b/argschema/fields/slice.py @@ -17,7 +17,7 @@ def __init__(self, **kwargs): kwargs['metadata'] = kwargs.get( 'metadata', {'description': 'slice the dataset'}) kwargs['default'] = kwargs.get('default', slice(None)) - super(Slice, self).__init__( **kwargs) + super(Slice, self).__init__(**kwargs) def _deserialize(self, value, attr, obj, **kwargs): try: From 1bf09a7a8254a32f1e99f1b60d7c96622c4aeb2e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 08:00:28 -0800 Subject: [PATCH 66/81] python3 inpsect warning fix --- argschema/autodoc.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/argschema/autodoc.py b/argschema/autodoc.py index ee11ab91..c2fd2c79 100644 --- a/argschema/autodoc.py +++ b/argschema/autodoc.py @@ -3,7 +3,10 @@ from argschema.utils import get_description_from_field from argschema.argschema_parser import ArgSchemaParser import inspect - +try: + from inspect import getfullargspec +except ImportError: + from inspect import getargspec as getfullargspec FIELD_TYPE_MAP = {v: k for k, v in mm.Schema.TYPE_MAPPING.items()} From 3646ca9344c79824b6052a668b2d1d4b443d1066 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 09:00:05 -0800 Subject: [PATCH 67/81] removing unused recursive checking --- argschema/argschema_parser.py | 61 ----------------------------------- 1 file changed, 61 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index adaf9aa0..d446d663 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -10,62 +10,6 @@ from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError, get_input_from_config -def contains_non_default_schemas(schema, schema_list=[]): - """returns True if this schema contains a schema which was not an instance of DefaultSchema - - Parameters - ---------- - schema : marshmallow.Schema - schema to check - schema_list : - (Default value = []) - - Returns - ------- - bool - does this schema only contain schemas which are subclassed from schemas.DefaultSchema - - """ - if not isinstance(schema, schemas.DefaultSchema): - return True - for k, v in schema.declared_fields.items(): - if isinstance(v, mm.fields.Nested): - if type(v.schema) in schema_list: - return False - else: - schema_list.append(type(v.schema)) - if contains_non_default_schemas(v.schema, schema_list): - return True - return False - - -def is_recursive_schema(schema, schema_list=[]): - """returns true if this schema contains recursive elements - - Parameters - ---------- - schema : marshmallow.Schema - schema to check - schema_list : - (Default value = []) - - Returns - ------- - bool - does this schema contain any recursively defined schemas - - """ - for k, v in schema.declared_fields.items(): - if isinstance(v, mm.fields.Nested): - if type(v.schema) in schema_list: - return True - else: - schema_list.append(type(v.schema)) - if is_recursive_schema(v.schema, schema_list): - return True - return False - - class ArgSchemaParser(object): """The main class you should sub-class to write your own argschema module. Takes input_data, reference to a input_json and the command line inputs and parses out the parameters @@ -274,11 +218,6 @@ def output(self, d, sink=None): output dictionary to output sink: argschema.sources.source.ArgSink output_sink to output to (optional default to self.output_source) - output_path: str - path to save to output file, optional (with default to self.mod['output_json'] location) - (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) - **sink_options : - will be passed through to sink.put_dict Raises ------ From 6471b0e5473433cdd6feb95ba5024f627352e49e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 10:58:36 -0800 Subject: [PATCH 68/81] bumping doc reqs --- doc_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc_requirements.txt b/doc_requirements.txt index 8e00ab27..36ddf95f 100644 --- a/doc_requirements.txt +++ b/doc_requirements.txt @@ -2,6 +2,6 @@ sphinxcontrib-napoleon sphinxcontrib-programoutput sphinxcontrib-inlinesyntaxhighlight numpy -marshmallow==3.0.0rc6 +marshmallow==3.6.1 pytest rstcheck From 7b8ea3f6cdcd0799ba64cff97d6aae8dee5f390b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 4 Feb 2020 17:29:29 -0800 Subject: [PATCH 69/81] merging master --- argschema/fields/files.py | 1 + 1 file changed, 1 insertion(+) diff --git a/argschema/fields/files.py b/argschema/fields/files.py index b9503d04..a27d5851 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -157,6 +157,7 @@ def validate_input_path(value): except Exception as value: raise mm.ValidationError("%s is not readable" % value) + class InputDir(mm.fields.Str): """InputDir is :class:`marshmallow.fields.Str` subclass which is a path to a a directory that exists and that the user can access From 39f35539b4bdf3a19e539fc5c07f852028ad47a7 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Fri, 21 Feb 2020 15:14:18 -0800 Subject: [PATCH 70/81] runtime input source selection --- argschema/__init__.py | 6 +- argschema/argschema_parser.py | 163 ++++++++++++++------- argschema/sources/__init__.py | 4 +- argschema/sources/json_source.py | 6 +- argschema/sources/source.py | 87 +++-------- argschema/sources/yaml_source.py | 6 +- test/_sources/conftest.py | 2 + test/{sources => _sources}/test_classes.py | 0 test/{sources => _sources}/test_json.py | 15 +- test/{sources => _sources}/test_url.py | 0 test/{sources => _sources}/test_yaml.py | 0 test/{sources => _sources}/url_source.py | 0 test/sources/test_parser_integration.py | 100 +++++++++++++ 13 files changed, 253 insertions(+), 136 deletions(-) create mode 100644 test/_sources/conftest.py rename test/{sources => _sources}/test_classes.py (100%) rename test/{sources => _sources}/test_json.py (63%) rename test/{sources => _sources}/test_url.py (100%) rename test/{sources => _sources}/test_yaml.py (100%) rename test/{sources => _sources}/url_source.py (100%) create mode 100644 test/sources/test_parser_integration.py diff --git a/argschema/__init__.py b/argschema/__init__.py index 2906aa6e..64e09c8e 100644 --- a/argschema/__init__.py +++ b/argschema/__init__.py @@ -1,7 +1,7 @@ '''argschema: flexible definition, validation and setting of parameters''' -from .fields import InputFile, InputDir, OutputFile # noQA:F401 -from .schemas import ArgSchema # noQA:F401 -from .argschema_parser import ArgSchemaParser # noQA:F401 +from argschema.fields import InputFile, InputDir, OutputFile # noQA:F401 +from argschema.schemas import ArgSchema # noQA:F401 +from argschema.argschema_parser import ArgSchemaParser # noQA:F401 __version__ = "3.0.1" diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index d446d663..b10dca26 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -1,13 +1,19 @@ '''Module that contains the base class ArgSchemaParser which should be subclassed when using this library ''' +from typing import List, Sequence, Dict, Optional, Union import logging from . import schemas from . import utils import marshmallow as mm from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink -from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError, get_input_from_config +from .sources.source import ( + ArgSource, + ArgSink, + NotConfiguredSourceError, + MultipleConfiguredSourceError, +) class ArgSchemaParser(object): @@ -26,9 +32,9 @@ class ArgSchemaParser(object): the schema to use to validate the parameters output_schema_type : marshmallow.Schema the schema to use to validate the output, used by self.output - input_source : argschema.sources.source.Source - a generic source of a dictionary - output_sink : argschema.sources.source.Source + input_sources : Sequence[argschema.sources.source.ConfigurableSource] + Each of these will be considered + output_sinks : Sequence[argschema.sources.source.ConfigurableSource] a generic sink to write output dictionary to args : list or None command line arguments passed to the module, if None use argparse to parse the command line, @@ -45,16 +51,38 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - default_configurable_sources = [JsonSource] - default_configurable_sinks = [JsonSink] + default_sources = (JsonSource,) + default_sinks = (JsonSink,) + + @property + def input_sources(self) -> List[ArgSource]: + if not hasattr(self, "_input_sources"): + self._input_sources = [] + return self._input_sources + + @property + def output_sinks(self) -> List[ArgSource]: + if not hasattr(self, "_output_sinks"): + self._output_sinks = [] + return self._output_sinks + + @property + def io_schemas(self) -> List[mm.Schema]: + if not hasattr(self, "_io_schemas"): + self._io_schemas = [] + return self._io_schemas + + @io_schemas.setter + def io_schemas(self, schemas: List[mm.Schema]): + self._io_schemas = schemas def __init__(self, input_data=None, # dictionary input as option instead of --input_json schema_type=None, # schema for parsing arguments output_schema_type=None, # schema for parsing output_json args=None, - input_source=None, - output_sink=None, + input_sources=None, + output_sinks=None, logger_name=__name__): if schema_type is None: @@ -66,51 +94,85 @@ def __init__(self, self.logger = self.initialize_logger(logger_name, 'WARNING') self.logger.debug('input_data is {}'.format(input_data)) - # convert schema to argparse object + self.register_sources(input_sources) + self.register_sinks(output_sinks) - # consolidate a list of the input and output source - # command line configuration schemas - io_schemas = [] - for in_cfg in self.default_configurable_sources: - io_schemas.append(in_cfg.ConfigSchema()) - for out_cfg in self.default_configurable_sinks: - io_schemas.append(out_cfg.ConfigSchema()) + argsdict = self.parse_command_line(args) + resolved_args = self.resolve_inputs(input_data, argsdict) - # build a command line parser from the input schemas and configurations - p = utils.schema_argparser(self.schema, io_schemas) - argsobj = p.parse_args(args) - argsdict = utils.args_to_dict(argsobj, [self.schema] + io_schemas) + self.output_sink = self.__get_output_sink_from_config(resolved_args) + self.args = self.load_schema_with_defaults(self.schema, resolved_args) + + self.output_schema_type = output_schema_type + self.logger = self.initialize_logger( + logger_name, self.args.get('log_level')) + + def register_sources( + self, + sources: Union[None, Sequence[ArgSource], ArgSource] + ): + """consolidate a list of the input and output source command line + configuration schemas + """ + + if isinstance(sources, (ArgSource, type)): + sources = [sources] + elif sources is None: + sources = self.default_sources + + + for source in sources: + if isinstance(source, type): + source = source() + self.io_schemas.append(source.schema) + self.input_sources.append(source) + + def register_sinks( + self, + sinks: Union[None, Sequence[ArgSink], ArgSink] + ): + """ + """ + + if isinstance(sinks, (ArgSink, type)): + sinks = [sinks] + elif sinks is None: + sinks = self.default_sinks + + for sink in sinks: + if isinstance(sink, type): + sink = sink() + self.io_schemas.append(sink.schema) + self.output_sinks.append(sink) + + def parse_command_line(self, args: Optional[List]): + """ build a command line parser from the input schemas and + configurations + """ + parser = utils.schema_argparser(self.schema, self.io_schemas) + argsobj = parser.parse_args(args) + argsdict = utils.args_to_dict(argsobj, [self.schema] + self.io_schemas) self.logger.debug('argsdict is {}'.format(argsdict)) + return argsdict + + def resolve_inputs(self, input_data: Dict, argsdict: Dict) -> Dict: + """ Resolve input source by checking candidate sources against + constructor and command line arguments + """ - # if you received an input_source, get the dictionary from there - if input_source is not None: - input_data = input_source.get_dict() - else: # see if the input_data itself contains an InputSource configuration use that - config_data = self.__get_input_data_from_config(input_data) - input_data = config_data if config_data is not None else input_data + config_data = self.__get_input_data_from_config(input_data) + if config_data is not None: + input_data = config_data - # check whether the command line arguments contain an input configuration and use that config_data = self.__get_input_data_from_config( utils.smart_merge({}, argsdict)) - input_data = config_data if config_data is not None else input_data + if config_data is not None: + input_data = config_data - # merge the command line dictionary into the input json args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) - # if the output sink was not passed in, see if there is a configuration in the combined args - if output_sink is None: - output_sink = self.__get_output_sink_from_config(args) - # save the output sink for later - self.output_sink = output_sink - - # validate with load! - result = self.load_schema_with_defaults(self.schema, args) - - self.args = result - self.output_schema_type = output_schema_type - self.logger = self.initialize_logger( - logger_name, self.args.get('log_level')) + return args def __get_output_sink_from_config(self, d): """private function to check for ArgSink configuration in a dictionary and return a configured ArgSink @@ -132,14 +194,14 @@ def __get_output_sink_from_config(self, d): """ output_set = False output_sink = None - for OutputSink in self.default_configurable_sinks: + for sink in self.output_sinks: try: - output_config_d = OutputSink.get_config( - OutputSink.ConfigSchema, d) + sink.load_config(d) + if output_set: raise MultipleConfiguredSourceError( "more then one OutputSink configuration present in {}".format(d)) - output_sink = OutputSink(**output_config_d) + output_sink = sink output_set = True except NotConfiguredSourceError: pass @@ -166,9 +228,10 @@ def __get_input_data_from_config(self, d): """ input_set = False input_data = None - for InputSource in self.default_configurable_sources: + for source in self.input_sources: try: - input_data = get_input_from_config(InputSource, d) + source.load_config(d) + input_data = source.get_dict() if input_set: raise MultipleConfiguredSourceError( "more then one InputSource configuration present in {}".format(d)) @@ -286,5 +349,5 @@ def initialize_logger(name, log_level): class ArgSchemaYamlParser(ArgSchemaParser): - default_configurable_sources = [YamlSource] - default_configurable_sinks = [YamlSink] + default_sources = [YamlSource] + default_sinks = [YamlSink] diff --git a/argschema/sources/__init__.py b/argschema/sources/__init__.py index 4a732b36..7d604108 100644 --- a/argschema/sources/__init__.py +++ b/argschema/sources/__init__.py @@ -1,2 +1,2 @@ -from .source import ArgSink, ArgSource -from .json_source import JsonSource, JsonSink +from argschema.sources.source import ArgSink, ArgSource +from argschema.sources.json_source import JsonSource, JsonSink diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 61ef02d0..12d517d7 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -20,17 +20,13 @@ class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema def get_dict(self): - with open(self.input_json, 'r') as fp: + with open(self.config["input_json"], 'r') as fp: return json.load(fp,) class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def __init__(self, output_json=None, output_json_indent=None): - self.output_json = output_json - self.indent = output_json_indent - def put_dict(self, d): with open(self.output_json, 'w') as fp: json.dump(d, fp, indent=self.indent) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index e1154130..6670ecaf 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,3 +1,6 @@ +import abc +from typing import Dict + import marshmallow as mm @@ -64,82 +67,29 @@ def __init__(self, **kwargs): which will define the set of fields that are allowed (and their defaults) """ self.schema = self.ConfigSchema() - result = self.get_config(self.ConfigSchema, kwargs) - self.__dict__.update(result) - - @staticmethod - def get_config(ConfigSchema, d): - """A static method to get the proper validated configuration keyword arguments/dictionary - of a Configurable source from a dictionary + self.config = {} - Parameters - ---------- - ConfigSchema: marshmallow.Schema - a marshmallow schema that defines the configuration schema for this ConfigurableSource - d: dict - a dictionary that might contain a proper configuration of this schema - - Returns - ------- - dict - a dictionary of configuration values that has been properly deserialized and validated by - ConfigSchema - Raises - ------ - NotConfiguredSourceError - if the configation dictionary does not contain a configuration for this source - MisconfiguredSourceError - if the configuration dictionary contains a configuration but it is invalid + def load_config(self, candidate: Dict): """ - schema = ConfigSchema() - if not d_contains_any_fields(schema, d): + """ + + if candidate is None: + raise NotConfiguredSourceError("No data was provided") + + if not d_contains_any_fields(self.schema, candidate): raise NotConfiguredSourceError( - "This source is not present in \n {}".format(d)) - else: - try: - result = schema.load(d, unknown=mm.EXCLUDE) - except mm.ValidationError as e: - raise MisconfiguredSourceError( - "Source incorrectly configured\n {}".format(e)) - else: - return result + "This source is not present in \n {}".format(candidate)) + + try: + self.config = self.schema.load(candidate, unknown=mm.EXCLUDE) + except mm.ValidationError as e: + raise MisconfiguredSourceError( + "Source incorrectly configured\n {}".format(e)) class ArgSource(ConfigurableSource): def get_dict(self): """method that must be implemented to enable an ArgSource to return a dictionary""" - pass - - -def get_input_from_config(ArgSource, config_d): - """function to return the input dictionary from an ArgSource, given a configuration dictionary - - Parameters - ---------- - ArgSource: class(ArgSource) - The ArgSource class subclass that you want to get input from - config_d: a dictionary that might contain a configuration for this source - - Returns - ------- - dict - a dictionary returned by ArgSource.get_dict() after validating configuration - and instantiating an ArgSource instance - - Raises - ------ - NotConfiguredSourceError - if the configation dictionary does not contain a configuration for this source - MisconfiguredSourceError - if the configuration dictionary contains a configuration but it is invalid - """ - if config_d is not None: - input_config_d = ArgSource.get_config(ArgSource.ConfigSchema, config_d) - input_source = ArgSource(**input_config_d) - input_data = input_source.get_dict() - return input_data - else: - raise NotConfiguredSourceError('No dictionary provided') class ArgSink(ConfigurableSource): @@ -151,4 +101,3 @@ def put_dict(self, d): d: dict the dictionary to write """ - pass diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index 571dd0cd..eb8f2dd0 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,5 +1,5 @@ import yaml -from .source import ArgSource, ArgSink +from argschema.sources.source import ArgSource, ArgSink import argschema import marshmallow as mm @@ -18,7 +18,7 @@ class YamlSource(ArgSource): ConfigSchema = YamlInputConfigSchema def get_dict(self): - with open(self.input_yaml, 'r') as fp: + with open(self.config["input_yaml"], 'r') as fp: return yaml.load(fp) @@ -26,5 +26,5 @@ class YamlSink(ArgSink): ConfigSchema = YamlOutputConfigSchema def put_dict(self, d): - with open(self.output_yaml, 'w') as fp: + with open(self.config["output_yaml"], 'w') as fp: yaml.dump(d, fp, default_flow_style=False) diff --git a/test/_sources/conftest.py b/test/_sources/conftest.py new file mode 100644 index 00000000..e8d71cba --- /dev/null +++ b/test/_sources/conftest.py @@ -0,0 +1,2 @@ +def pytest_ignore_collect(path, config): + return True \ No newline at end of file diff --git a/test/sources/test_classes.py b/test/_sources/test_classes.py similarity index 100% rename from test/sources/test_classes.py rename to test/_sources/test_classes.py diff --git a/test/sources/test_json.py b/test/_sources/test_json.py similarity index 63% rename from test/sources/test_json.py rename to test/_sources/test_json.py index 1acc001b..c9d55d5e 100644 --- a/test/sources/test_json.py +++ b/test/_sources/test_json.py @@ -18,12 +18,19 @@ def test_input_file(tmpdir_factory): } } with open(str(file_in),'w') as fp: - json.dump(input_data,fp) + json.dump(input_data, fp) return str(file_in) -def test_json_source(test_input_file): - source = JsonSource(input_json=test_input_file) - mod = MyParser(input_source= source, args=[]) +def test_json_source_input_data(test_input_file): + mod = MyParser( + input_sources=JsonSource(), + input_data={"input_json": test_input_file}, + args=[] + ) + +# def test_json_source(test_input_file): +# source = JsonSource(input_json=test_input_file) +# mod = MyParser(input_sources= source, args=) def test_json_source_command(test_input_file): mod = MyParser(args = ['--input_json',test_input_file]) \ No newline at end of file diff --git a/test/sources/test_url.py b/test/_sources/test_url.py similarity index 100% rename from test/sources/test_url.py rename to test/_sources/test_url.py diff --git a/test/sources/test_yaml.py b/test/_sources/test_yaml.py similarity index 100% rename from test/sources/test_yaml.py rename to test/_sources/test_yaml.py diff --git a/test/sources/url_source.py b/test/_sources/url_source.py similarity index 100% rename from test/sources/url_source.py rename to test/_sources/url_source.py diff --git a/test/sources/test_parser_integration.py b/test/sources/test_parser_integration.py new file mode 100644 index 00000000..adc2ff24 --- /dev/null +++ b/test/sources/test_parser_integration.py @@ -0,0 +1,100 @@ +import json +import yaml + +import pytest + +import argschema +from argschema.sources.json_source import JsonSource +from argschema.sources.yaml_source import YamlSource +from argschema.sources.source import MultipleConfiguredSourceError + + +class MyNestedSchema(argschema.schemas.DefaultSchema): + one = argschema.fields.Int(required=True,description="nested integer") + two = argschema.fields.Boolean(required=True,description="a nested boolean") + +class MySchema(argschema.ArgSchema): + a = argschema.fields.Int(required=True,description="parameter a") + b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") + nest = argschema.fields.Nested(MyNestedSchema,description="a nested schema") + +class MyOutputSchema(argschema.schemas.DefaultSchema): + a = argschema.fields.Int(required=True,description="parameter a") + b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") + +class MyParser(argschema.ArgSchemaParser): + default_schema = MySchema + +@pytest.fixture(scope='module') +def json_inp(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') + input_data = { + 'a':5, + 'nest':{ + 'one':7, + 'two':False + } + } + + with open(str(file_in),'w') as fp: + json.dump(input_data, fp) + + return str(file_in) + +@pytest.fixture(scope='module') +def yaml_inp(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yaml') + input_data = { + 'a':6, + 'nest':{ + 'one':8, + 'two':False + } + } + + with open(str(file_in),'w') as fp: + yaml.dump(input_data, fp) + + return str(file_in) + + +@pytest.mark.parametrize("inp_sources", [ + JsonSource(), [JsonSource()], JsonSource, [JsonSource] +]) +def test_json_input_args(json_inp, inp_sources): + parser = MyParser( + input_sources=inp_sources, + args=["--input_json", + json_inp] + ) + + assert parser.args["a"] == 5 + + +@pytest.mark.parametrize("inp_sources", [ + JsonSource(), [JsonSource()], JsonSource, [JsonSource] +]) +def test_json_input_data(json_inp, inp_sources): + parser = MyParser( + input_sources=inp_sources, + input_data={"input_json":json_inp}, + args=[] + ) + + assert parser.args["a"] == 5 + + +def test_multisource_arg(yaml_inp): + parser = MyParser( + input_sources=[JsonSource, YamlSource], + args=["--input_yaml", yaml_inp] + ) + assert parser.args["a"] == 6 + + +def test_multisource_arg_conflict(json_inp, yaml_inp): + with pytest.raises(MultipleConfiguredSourceError): + parser = MyParser( + input_sources=[JsonSource, YamlSource], + args=["--input_yaml", yaml_inp, "--input_json", json_inp] + ) From 222322297792f6a28ee1d73d56b17065d5b21610 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Fri, 21 Feb 2020 17:37:42 -0800 Subject: [PATCH 71/81] tests for specific sources and sinks --- argschema/sources/json_source.py | 4 +- .../sources}/url_source.py | 22 ++--- test/_sources/conftest.py | 2 - test/_sources/test_classes.py | 14 --- test/_sources/test_json.py | 36 -------- test/_sources/test_url.py | 41 --------- test/_sources/test_yaml.py | 90 ------------------- test/sources/test_json.py | 27 ++++++ test/sources/test_parser_integration.py | 37 ++++++-- test/sources/test_url.py | 37 ++++++++ test/sources/test_yaml.py | 25 ++++++ 11 files changed, 133 insertions(+), 202 deletions(-) rename {test/_sources => argschema/sources}/url_source.py (64%) delete mode 100644 test/_sources/conftest.py delete mode 100644 test/_sources/test_classes.py delete mode 100644 test/_sources/test_json.py delete mode 100644 test/_sources/test_url.py delete mode 100644 test/_sources/test_yaml.py create mode 100644 test/sources/test_json.py create mode 100644 test/sources/test_url.py create mode 100644 test/sources/test_yaml.py diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 12d517d7..8dab3e95 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -28,5 +28,5 @@ class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema def put_dict(self, d): - with open(self.output_json, 'w') as fp: - json.dump(d, fp, indent=self.indent) + with open(self.config["output_json"], 'w') as fp: + json.dump(d, fp, indent=self.config.get("output_json_indent", None)) diff --git a/test/_sources/url_source.py b/argschema/sources/url_source.py similarity index 64% rename from test/_sources/url_source.py rename to argschema/sources/url_source.py index 56e67808..48456d8b 100644 --- a/test/_sources/url_source.py +++ b/argschema/sources/url_source.py @@ -2,7 +2,6 @@ from argschema.schemas import DefaultSchema from argschema.fields import Str,Int from argschema import ArgSchemaParser -from test_classes import MySchema import requests try: from urllib.parse import urlunparse @@ -19,15 +18,16 @@ class UrlSource(ArgSource): ConfigSchema = UrlSourceConfig def get_dict(self): - if self.input_port is None: - netloc = self.input_host - else: - netloc = "{}:{}".format(self.input_host,self.input_port) - url = urlunparse((self.input_protocol,netloc,self.input_url,None,None,None)) + netloc = self.config["input_host"] + if self.config["input_port"] is not None: + netloc = "{}:{}".format(netloc, self.config["input_port"]) + + url = urlunparse(( + self.config["input_protocol"], + netloc, + self.config["input_url"], + None, None, None + )) + response = requests.get(url) return response.json() - - -class UrlArgSchemaParser(ArgSchemaParser): - default_configurable_sources = [UrlSource] - default_schema = MySchema diff --git a/test/_sources/conftest.py b/test/_sources/conftest.py deleted file mode 100644 index e8d71cba..00000000 --- a/test/_sources/conftest.py +++ /dev/null @@ -1,2 +0,0 @@ -def pytest_ignore_collect(path, config): - return True \ No newline at end of file diff --git a/test/_sources/test_classes.py b/test/_sources/test_classes.py deleted file mode 100644 index a9de6944..00000000 --- a/test/_sources/test_classes.py +++ /dev/null @@ -1,14 +0,0 @@ -import argschema - -class MyNestedSchema(argschema.schemas.DefaultSchema): - one = argschema.fields.Int(required=True,description="nested integer") - two = argschema.fields.Boolean(required=True,description="a nested boolean") - -class MySchema(argschema.ArgSchema): - a = argschema.fields.Int(required=True,description="parameter a") - b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") - nest = argschema.fields.Nested(MyNestedSchema,description="a nested schema") - -class MyOutputSchema(argschema.schemas.DefaultSchema): - a = argschema.fields.Int(required=True,description="parameter a") - b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") \ No newline at end of file diff --git a/test/_sources/test_json.py b/test/_sources/test_json.py deleted file mode 100644 index c9d55d5e..00000000 --- a/test/_sources/test_json.py +++ /dev/null @@ -1,36 +0,0 @@ -import argschema -from argschema.sources.json_source import JsonSource -from test_classes import MySchema -import json -import pytest - -class MyParser(argschema.ArgSchemaParser): - default_schema = MySchema - -@pytest.fixture(scope='module') -def test_input_file(tmpdir_factory): - file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') - input_data = { - 'a':5, - 'nest':{ - 'one':7, - 'two':False - } - } - with open(str(file_in),'w') as fp: - json.dump(input_data, fp) - return str(file_in) - -def test_json_source_input_data(test_input_file): - mod = MyParser( - input_sources=JsonSource(), - input_data={"input_json": test_input_file}, - args=[] - ) - -# def test_json_source(test_input_file): -# source = JsonSource(input_json=test_input_file) -# mod = MyParser(input_sources= source, args=) - -def test_json_source_command(test_input_file): - mod = MyParser(args = ['--input_json',test_input_file]) \ No newline at end of file diff --git a/test/_sources/test_url.py b/test/_sources/test_url.py deleted file mode 100644 index f90cf9b3..00000000 --- a/test/_sources/test_url.py +++ /dev/null @@ -1,41 +0,0 @@ -import requests -import mock -from url_source import UrlArgSchemaParser, UrlSource, MySchema -from argschema import ArgSchemaParser -# This method will be used by the mock to replace requests.get - - -def mocked_requests_get(*args, **kwargs): - class MockResponse: - def __init__(self, json_data, status_code): - self.json_data = json_data - self.status_code = status_code - - def json(self): - return self.json_data - - if args[0] == 'http://localhost:88/test.json': - return MockResponse({ - 'a': 7, - 'nest': { - 'one': 7, - 'two': False - } - }, 200) - return MockResponse(None, 404) - - -@mock.patch('requests.get', side_effect=mocked_requests_get) -def test_url_parser(mock_get): - input_source = UrlSource(input_host='localhost', - input_port=88, input_url='test.json') - mod = ArgSchemaParser(schema_type=MySchema, - input_source=input_source, args=[]) - assert(mod.args['a'] == 7) - - -@mock.patch('requests.get', side_effect=mocked_requests_get) -def test_url_parser_command_line(mock_get): - mod = UrlArgSchemaParser( - args=['--input_host', 'localhost', '--input_port', '88', '--input_url', 'test.json']) - assert(mod.args['a'] == 7) diff --git a/test/_sources/test_yaml.py b/test/_sources/test_yaml.py deleted file mode 100644 index 96c0bce2..00000000 --- a/test/_sources/test_yaml.py +++ /dev/null @@ -1,90 +0,0 @@ -import argschema -from argschema.sources.yaml_source import YamlSource, YamlSink -from argschema.sources.json_source import JsonSource, JsonSink -from argschema.sources.source import MultipleConfiguredSourceError -from argschema.argschema_parser import ArgSchemaYamlParser -from test_classes import MySchema, MyOutputSchema -import yaml -import pytest -import json - -class MyParser(ArgSchemaYamlParser): - default_schema = MySchema - default_output_schema = MyOutputSchema - -class MyDualParser(MyParser): - default_configurable_sources = [JsonSource, YamlSource] - default_configurable_sinks = [JsonSink, YamlSink] - -input_data = { - 'a': 5, - 'nest': { - 'one': 7, - 'two': False - } -} - -@pytest.fixture(scope='module') -def test_yaml_input_file(tmpdir_factory): - file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yml') - - with open(str(file_in), 'w') as fp: - yaml.dump(input_data, fp, default_flow_style=False) - return str(file_in) - -@pytest.fixture(scope='module') -def test_json_input_file(tmpdir_factory): - file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') - - with open(str(file_in), 'w') as fp: - json.dump(input_data, fp) - return str(file_in) - - -def test_yaml_source(test_yaml_input_file): - source = YamlSource(input_yaml=test_yaml_input_file) - mod = MyParser(input_source=source, args=[]) - - -def test_yaml_source_command(test_yaml_input_file): - mod = MyParser(args=['--input_yaml', test_yaml_input_file]) - - -def test_yaml_sink(test_yaml_input_file, tmpdir): - outfile = tmpdir.join('test_out.yml') - output_data = { - 'a': 3 - } - source = YamlSource(input_yaml=test_yaml_input_file) - sink = YamlSink(output_yaml = str(outfile)) - mod = MyParser(input_source=source, - output_sink=sink) - mod.output(output_data) - - with open(str(outfile), 'r') as fp: - d = yaml.load(fp) - output_data['b'] = "my value" - assert (output_data == d) - -def test_dual_parser(test_json_input_file,test_yaml_input_file): - - mod = MyDualParser(args=['--input_yaml', test_yaml_input_file]) - assert mod.args['a']==5 - assert mod.args['nest']==input_data['nest'] - - mod = MyDualParser(args=['--input_json', test_json_input_file]) - assert mod.args['a']==5 - assert mod.args['nest']==input_data['nest'] - -def test_dual_parser_fail(test_json_input_file,test_yaml_input_file): - with pytest.raises(MultipleConfiguredSourceError): - mod = MyDualParser(args=['--input_yaml', test_yaml_input_file, '--input_json', test_json_input_file]) - -def test_dual_parser_output_fail(test_json_input_file,tmpdir): - test_json_output = str(tmpdir.join('output.yml')) - test_yaml_output = str(tmpdir.join('output.json')) - with pytest.raises(MultipleConfiguredSourceError): - mod = MyDualParser(args=['--input_json', test_json_input_file, - '--output_json',test_json_output, - '--output_yaml',test_yaml_output]) - \ No newline at end of file diff --git a/test/sources/test_json.py b/test/sources/test_json.py new file mode 100644 index 00000000..08bbb8d0 --- /dev/null +++ b/test/sources/test_json.py @@ -0,0 +1,27 @@ +import json + +import pytest + +from argschema.sources import json_source + + +def test_json_source_get_dict(tmpdir_factory): + path = str(tmpdir_factory.mktemp("test_json_source").join("inp.json")) + + with open(path, "w") as jf: + json.dump({"a": 12}, jf) + + source = json_source.JsonSource() + source.load_config({"input_json": path}) + + assert source.get_dict()["a"] == 12 + +def test_json_sink_put_dict(tmpdir_factory): + path = str(tmpdir_factory.mktemp("test_json_source").join("out.json")) + + sink = json_source.JsonSink() + sink.load_config({"output_json": path}) + sink.put_dict({"a": 13}) + + with open(path, "r") as jf: + assert json.load(jf)["a"] == 13 \ No newline at end of file diff --git a/test/sources/test_parser_integration.py b/test/sources/test_parser_integration.py index adc2ff24..85b82d31 100644 --- a/test/sources/test_parser_integration.py +++ b/test/sources/test_parser_integration.py @@ -1,11 +1,12 @@ import json -import yaml +import os import pytest +import yaml import argschema -from argschema.sources.json_source import JsonSource -from argschema.sources.yaml_source import YamlSource +from argschema.sources.json_source import JsonSource, JsonSink +from argschema.sources.yaml_source import YamlSource, YamlSink from argschema.sources.source import MultipleConfiguredSourceError @@ -70,7 +71,6 @@ def test_json_input_args(json_inp, inp_sources): assert parser.args["a"] == 5 - @pytest.mark.parametrize("inp_sources", [ JsonSource(), [JsonSource()], JsonSource, [JsonSource] ]) @@ -83,7 +83,6 @@ def test_json_input_data(json_inp, inp_sources): assert parser.args["a"] == 5 - def test_multisource_arg(yaml_inp): parser = MyParser( input_sources=[JsonSource, YamlSource], @@ -91,10 +90,36 @@ def test_multisource_arg(yaml_inp): ) assert parser.args["a"] == 6 - def test_multisource_arg_conflict(json_inp, yaml_inp): with pytest.raises(MultipleConfiguredSourceError): parser = MyParser( input_sources=[JsonSource, YamlSource], args=["--input_yaml", yaml_inp, "--input_json", json_inp] ) + +def test_multisink(yaml_inp): + out_path = os.path.join(os.path.dirname(yaml_inp), "out.json") + + parser = MyParser( + output_schema_type=MyOutputSchema, + input_sources=YamlSource, + output_sinks=[YamlSink, JsonSink], + args=["--input_yaml", yaml_inp, "--output_json", out_path] + ) + + parser.output({"a": 12, "b": "16"}) + with open(out_path, "r") as out_file: + obt = json.load(out_file) + assert obt["a"] == 12 + +def test_multisink_conflicting(yaml_inp, json_inp): + yaml_out = os.path.join(os.path.dirname(yaml_inp), "out.yaml") + json_out = os.path.join(os.path.dirname(json_inp), "out.json") + + with pytest.raises(MultipleConfiguredSourceError): + parser = MyParser( + output_schema_type=MyOutputSchema, + input_sources=[YamlSource], + output_sinks=[JsonSink, YamlSink], + args=["--output_yaml", yaml_out, "--output_json", json_out] + ) diff --git a/test/sources/test_url.py b/test/sources/test_url.py new file mode 100644 index 00000000..b57b0daa --- /dev/null +++ b/test/sources/test_url.py @@ -0,0 +1,37 @@ +import requests +import mock +from argschema.sources.url_source import UrlSource +from argschema import ArgSchemaParser + + +def mocked_requests_get(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + if args[0] == 'http://localhost:88/test.json': + return MockResponse({ + 'a': 7, + 'nest': { + 'one': 7, + 'two': False + } + }, 200) + return MockResponse(None, 404) + + +@mock.patch('requests.get', side_effect=mocked_requests_get) +def test_url_parser_get_dict(mock_get): + source = UrlSource() + source.load_config({ + "input_host": "localhost", + "input_port": 88, + "input_url": "test.json", + }) + + obtained = source.get_dict() + assert obtained["a"] == 7 \ No newline at end of file diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py new file mode 100644 index 00000000..bad20703 --- /dev/null +++ b/test/sources/test_yaml.py @@ -0,0 +1,25 @@ +import pytest +import yaml + +from argschema.sources import yaml_source + +def test_json_source_get_dict(tmpdir_factory): + path = str(tmpdir_factory.mktemp("test_yaml_source").join("inp.yaml")) + + with open(path, "w") as jf: + yaml.dump({"a": 12}, jf) + + source = yaml_source.YamlSource() + source.load_config({"input_yaml": path}) + + assert source.get_dict()["a"] == 12 + +def test_json_sink_put_dict(tmpdir_factory): + path = str(tmpdir_factory.mktemp("test_yaml_source").join("out.yaml")) + + sink = yaml_source.YamlSink() + sink.load_config({"output_yaml": path}) + sink.put_dict({"a": 13}) + + with open(path, "r") as jf: + assert yaml.load(jf)["a"] == 13 \ No newline at end of file From fc8f64f97b38a6b63e47f7c1176e02a16509eba1 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Mon, 24 Feb 2020 19:02:24 -0800 Subject: [PATCH 72/81] docstrings for config source; some renaming --- argschema/argschema_parser.py | 157 ++++++++++++++++-------- argschema/sources/__init__.py | 2 +- argschema/sources/json_source.py | 20 ++- argschema/sources/source.py | 118 ++++++++++++------ argschema/sources/url_source.py | 7 +- argschema/sources/yaml_source.py | 18 ++- test/sources/test_parser_integration.py | 6 +- 7 files changed, 224 insertions(+), 104 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index b10dca26..6a96e808 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -1,7 +1,7 @@ '''Module that contains the base class ArgSchemaParser which should be subclassed when using this library ''' -from typing import List, Sequence, Dict, Optional, Union +from typing import List, Sequence, Dict, Optional, Union, Tuple, Type, TypeVar import logging from . import schemas from . import utils @@ -9,13 +9,27 @@ from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink from .sources.source import ( - ArgSource, - ArgSink, - NotConfiguredSourceError, - MultipleConfiguredSourceError, + ConfigurableSource, + ConfigurableSink, + NonconfigurationError, + MultipleConfigurationError, ) +SourceType = Union[ConfigurableSource, Type[ConfigurableSource]] +RegistrableSources = Union[ + None, + SourceType, + Sequence[SourceType], +] +SinkType = Union[ConfigurableSink, Type[ConfigurableSink]] +RegistrableSinks = Union[ + None, + SinkType, + Sequence[SinkType], +] + + class ArgSchemaParser(object): """The main class you should sub-class to write your own argschema module. Takes input_data, reference to a input_json and the command line inputs and parses out the parameters @@ -33,9 +47,9 @@ class ArgSchemaParser(object): output_schema_type : marshmallow.Schema the schema to use to validate the output, used by self.output input_sources : Sequence[argschema.sources.source.ConfigurableSource] - Each of these will be considered + each of these will be considered as a potential source of input data output_sinks : Sequence[argschema.sources.source.ConfigurableSource] - a generic sink to write output dictionary to + each of these will be considered as a potential sink for output data args : list or None command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing @@ -51,25 +65,25 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - default_sources = (JsonSource,) - default_sinks = (JsonSink,) + default_sources: Tuple[SourceType] = (JsonSource,) + default_sinks: Tuple[SinkType] = (JsonSink,) @property - def input_sources(self) -> List[ArgSource]: + def input_sources(self) -> List[ConfigurableSource]: if not hasattr(self, "_input_sources"): - self._input_sources = [] + self._input_sources: List[ConfigurableSource] = [] return self._input_sources @property - def output_sinks(self) -> List[ArgSource]: + def output_sinks(self) -> List[ConfigurableSink]: if not hasattr(self, "_output_sinks"): - self._output_sinks = [] + self._output_sinks: List[ConfigurableSink] = [] return self._output_sinks @property def io_schemas(self) -> List[mm.Schema]: if not hasattr(self, "_io_schemas"): - self._io_schemas = [] + self._io_schemas: List[mm.Schema] = [] return self._io_schemas @io_schemas.setter @@ -105,23 +119,31 @@ def __init__(self, self.output_schema_type = output_schema_type self.logger = self.initialize_logger( - logger_name, self.args.get('log_level')) + logger_name, self.args.get('log_level')) def register_sources( self, - sources: Union[None, Sequence[ArgSource], ArgSource] + sources: RegistrableSources ): - """consolidate a list of the input and output source command line - configuration schemas + """consolidate a list of the input source configuration schemas + + Parameters + ---------- + sources : + Each source will be registered (and may then be configured by data + passed to this parser). If None is argued, the default_sources + associated with this class will be registered. + """ - if isinstance(sources, (ArgSource, type)): - sources = [sources] + if isinstance(sources, (ConfigurableSource, type)): + coerced_sources: Sequence[SourceType] = [sources] elif sources is None: - sources = self.default_sources - + coerced_sources = self.default_sources + else: + coerced_sources = sources - for source in sources: + for source in coerced_sources: if isinstance(source, type): source = source() self.io_schemas.append(source.schema) @@ -129,25 +151,48 @@ def register_sources( def register_sinks( self, - sinks: Union[None, Sequence[ArgSink], ArgSink] + sinks: RegistrableSinks ): - """ + """Consolidate a list of the output sink configuration schemas + + Parameters + ---------- + sinks : + Each sink will be registered (and may then be configured by data + passed to this parser). If None is argued, the default_sinks + associated with this class will be registered. + """ - if isinstance(sinks, (ArgSink, type)): - sinks = [sinks] + if isinstance(sinks, (ConfigurableSink, type)): + coerced_sinks: Sequence[SinkType] = [sinks] elif sinks is None: - sinks = self.default_sinks + coerced_sinks = self.default_sinks + else: + coerced_sinks = sinks - for sink in sinks: + for sink in coerced_sinks: if isinstance(sink, type): sink = sink() self.io_schemas.append(sink.schema) self.output_sinks.append(sink) - def parse_command_line(self, args: Optional[List]): - """ build a command line parser from the input schemas and - configurations + def parse_command_line(self, args: Optional[List[str]]) -> Dict: + """Build a command line parser from the input schemas and + configurations. Parse command line arguments using this parser + + Parameters + ---------- + args : list of str or None + Will be passed directly to argparse's parse_args. If None, sys.argv + will be used. If provided, should be formatted like: + ["positional_arg", "--optional_arg", "optional_value"] + + Returns + ------- + argsdict : dict + a (potentially nested) dictionary of parsed command line arguments + """ parser = utils.schema_argparser(self.schema, self.io_schemas) argsobj = parser.parse_args(args) @@ -158,6 +203,25 @@ def parse_command_line(self, args: Optional[List]): def resolve_inputs(self, input_data: Dict, argsdict: Dict) -> Dict: """ Resolve input source by checking candidate sources against constructor and command line arguments + + Parameters + ---------- + input_data : dict + Manually (on ArgschemaParser construction) specified parameters. + Will be overridden if values are successfully extracted from + argsdict. + argsdict : dict + Command line parameters, parsed into a nested dictionary. + + Returns + ------- + args : dict + A fully merged (possibly nested) collection of inputs. May draw from + 1. input data + 2. the argsdict + 3. any configurable sources whose config schemas are satisfied + by values in the above + """ config_data = self.__get_input_data_from_config(input_data) @@ -175,21 +239,21 @@ def resolve_inputs(self, input_data: Dict, argsdict: Dict) -> Dict: return args def __get_output_sink_from_config(self, d): - """private function to check for ArgSink configuration in a dictionary and return a configured ArgSink + """private function to check for ConfigurableSink configuration in a dictionary and return a configured ConfigurableSink Parameters ---------- d : dict - dictionary to look for ArgSink Configuration parameters in + dictionary to look for ConfigurableSink Configuration parameters in Returns ------- - ArgSink - A configured argsink + ConfigurableSink + A configured ConfigurableSink Raises ------ - MultipleConfiguredSourceError + MultipleConfigurationError If more than one Sink is configured """ output_set = False @@ -199,16 +263,16 @@ def __get_output_sink_from_config(self, d): sink.load_config(d) if output_set: - raise MultipleConfiguredSourceError( + raise MultipleConfigurationError( "more then one OutputSink configuration present in {}".format(d)) output_sink = sink output_set = True - except NotConfiguredSourceError: + except NonconfigurationError: pass return output_sink def __get_input_data_from_config(self, d): - """private function to check for ArgSource configurations in a dictionary + """private function to check for ConfigurableSource configurations in a dictionary and return the data if it exists Parameters @@ -223,7 +287,7 @@ def __get_input_data_from_config(self, d): Raises ------ - MultipleConfiguredSourceError + MultipleConfigurationError if more than one InputSource is configured """ input_set = False @@ -233,10 +297,10 @@ def __get_input_data_from_config(self, d): source.load_config(d) input_data = source.get_dict() if input_set: - raise MultipleConfiguredSourceError( + raise MultipleConfigurationError( "more then one InputSource configuration present in {}".format(d)) input_set = True - except NotConfiguredSourceError as e: + except NonconfigurationError as e: pass return input_data @@ -279,7 +343,7 @@ def output(self, d, sink=None): ---------- d:dict output dictionary to output - sink: argschema.sources.source.ArgSink + sink: argschema.sources.source.ConfigurableSink output_sink to output to (optional default to self.output_source) Raises @@ -346,8 +410,3 @@ def initialize_logger(name, log_level): logger = logging.getLogger(name) logger.setLevel(level=level) return logger - - -class ArgSchemaYamlParser(ArgSchemaParser): - default_sources = [YamlSource] - default_sinks = [YamlSink] diff --git a/argschema/sources/__init__.py b/argschema/sources/__init__.py index 7d604108..76bfa557 100644 --- a/argschema/sources/__init__.py +++ b/argschema/sources/__init__.py @@ -1,2 +1,2 @@ -from argschema.sources.source import ArgSink, ArgSource +from argschema.sources.source import ConfigurableSource, ConfigurableSink from argschema.sources.json_source import JsonSource, JsonSink diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 8dab3e95..22ec191f 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -1,4 +1,4 @@ -from .source import ArgSource, ArgSink +from argschema.sources.source import ConfigurableSource, ConfigurableSink import json import marshmallow as mm import argschema @@ -16,7 +16,12 @@ class JsonOutputConfigSchema(mm.Schema): description='whether to indent options or not') -class JsonSource(ArgSource): +class JsonSource(ConfigurableSource): + """ A configurable source which reads values from a json. Expects + --input_json + to be specified. + """ + ConfigSchema = JsonInputConfigSchema def get_dict(self): @@ -24,9 +29,14 @@ def get_dict(self): return json.load(fp,) -class JsonSink(ArgSink): +class JsonSink(ConfigurableSink): + """ A configurable sink which writes values to a json. Expects + --output_json + to be specified. + """ ConfigSchema = JsonOutputConfigSchema - def put_dict(self, d): + def put_dict(self, data): with open(self.config["output_json"], 'w') as fp: - json.dump(d, fp, indent=self.config.get("output_json_indent", None)) + json.dump( + data, fp, indent=self.config.get("output_json_indent", None)) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 6670ecaf..0d4e49c3 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,103 +1,143 @@ import abc -from typing import Dict +from typing import Dict, Type import marshmallow as mm -class ConfigurableSourceError(mm.ValidationError): - """Base Exception class for configurable sources""" +class ConfigurationError(mm.ValidationError): + """Base Exception class for configurations""" pass -class MisconfiguredSourceError(ConfigurableSourceError): - """Exception when a source configuration was present in part but failed +class MisconfigurationError(ConfigurationError): + """Exception when a configuration was present in part but failed validation""" pass -class NotConfiguredSourceError(ConfigurableSourceError): - """Exception when the source configuration is simply completely missing""" +class NonconfigurationError(ConfigurationError): + """Exception when a configuration is simply completely missing""" pass -class MultipleConfiguredSourceError(ConfigurableSourceError): - """Exception when there is more than one validly configured Source configured""" +class MultipleConfigurationError(ConfigurationError): + """Exception when there is more than one valid configuration""" pass -def d_contains_any_fields(schema, d): +def d_contains_any_fields(schema: mm.Schema, data: Dict) -> bool: """function to test if a dictionary contains any elements of a schema Parameters ---------- schema: marshmallow.Schema a marshmallow schema to test d with - d: dict + data: dict the dictionary to test whether it contains any elements of a schema Returns ------- bool: - True/False whether d contains any elements of a schema. If a schema contains no elements, returns True + True/False whether d contains any elements of a schema. If a schema + contains no elements, returns True """ if len(schema.declared_fields) == 0: return True + for field_name, field in schema.declared_fields.items(): - if field_name in d.keys(): - if d[field_name] is not None: + if field_name in data.keys(): + if data[field_name] is not None: return True + return False -class ConfigSourceSchema(mm.Schema): - pass +class Configurable(object): + """Base class for sources and sinks of marshmallow-validatable + parameters. + + Parameters + ---------- + **default_config : dict + Optionally, attempt to load a config immediately upon construction + + Attributes + ---------- + ConfigSchema : type(mm.Schema), class attribute + Defines a schema for this Configurable's config. + config : dict + Stores for values loaded according to this instance's schema + schema : mm.Schema + An instance of this class's ConfigSchema. Used to validate potential + configurations. + + """ + + ConfigSchema: Type[mm.Schema] = mm.Schema -class ConfigurableSource(object): - ConfigSchema = ConfigSourceSchema + def __init__(self, **default_config: Dict): - def __init__(self, **kwargs): - """Configurable source + self.schema: mm.Schema = self.ConfigSchema() + self.config: Dict = {} + + if default_config: + self.load_config(default_config) + + def load_config(self, candidate: Dict): + """Attempt to configure this object inplace using values in a candidate + dictionary. Parameters ---------- - **kwargs: dict - a set of keyword arguments which will be validated by this classes ConfigSchema - which will define the set of fields that are allowed (and their defaults) - """ - self.schema = self.ConfigSchema() - self.config = {} + candidate : dict + Might satisfy (and will be loaded using) this object's schema. + + Raises + ------ + NonconfigurationError : Indicates that the candidate was completely + inapplicable. + MisconfigurationError : Indicates that the candidate did not adequetly + satisfy this configurable's schema. - def load_config(self, candidate: Dict): - """ """ if candidate is None: - raise NotConfiguredSourceError("No data was provided") + candidate = {} if not d_contains_any_fields(self.schema, candidate): - raise NotConfiguredSourceError( + raise NonconfigurationError( "This source is not present in \n {}".format(candidate)) try: self.config = self.schema.load(candidate, unknown=mm.EXCLUDE) except mm.ValidationError as e: - raise MisconfiguredSourceError( + raise MisconfigurationError( "Source incorrectly configured\n {}".format(e)) -class ArgSource(ConfigurableSource): - def get_dict(self): - """method that must be implemented to enable an ArgSource to return a dictionary""" +class ConfigurableSource(Configurable): + def get_dict(self) -> Dict: + """Produces a dictionary, potentially using information from this + source's config. + + Returns + ------- + dict : Suitable for validatation by some external marshmallow schema. + """ + raise NotImplementedError() -class ArgSink(ConfigurableSource): - def put_dict(self, d): - """method that must be implemented to enable an ArgSink to write a dictionary + +class ConfigurableSink(Configurable): + def put_dict(self, data: Dict): + """Writes a dictionary, potentially using information from this + sink's config. Parameters ---------- - d: dict - the dictionary to write + dict : Will be written to some external sink. + """ + raise NotImplementedError() diff --git a/argschema/sources/url_source.py b/argschema/sources/url_source.py index 48456d8b..3a270255 100644 --- a/argschema/sources/url_source.py +++ b/argschema/sources/url_source.py @@ -1,4 +1,4 @@ -from argschema.sources import ArgSource, ArgSink +from argschema.sources import ConfigurableSource from argschema.schemas import DefaultSchema from argschema.fields import Str,Int from argschema import ArgSchemaParser @@ -14,7 +14,10 @@ class UrlSourceConfig(DefaultSchema): input_url = Str(required=True, description="location on host of input") input_protocol = Str(required=False, default='http', description="url protocol to use") -class UrlSource(ArgSource): +class UrlSource(ConfigurableSource): + """ A configurable source which obtains values by making a GET request, + expecting a JSON response. + """ ConfigSchema = UrlSourceConfig def get_dict(self): diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index eb8f2dd0..878a6cc2 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,5 +1,5 @@ import yaml -from argschema.sources.source import ArgSource, ArgSink +from argschema.sources.source import ConfigurableSource, ConfigurableSink import argschema import marshmallow as mm @@ -14,7 +14,11 @@ class YamlOutputConfigSchema(mm.Schema): description='filepath to save output yaml') -class YamlSource(ArgSource): +class YamlSource(ConfigurableSource): + """ A configurable source which reads values from a yaml. Expects + --input_yaml + to be specified. + """ ConfigSchema = YamlInputConfigSchema def get_dict(self): @@ -22,9 +26,13 @@ def get_dict(self): return yaml.load(fp) -class YamlSink(ArgSink): +class YamlSink(ConfigurableSink): + """ A configurable sink which writes values to a yaml. Expects + --output_yaml + to be specified. + """ ConfigSchema = YamlOutputConfigSchema - def put_dict(self, d): + def put_dict(self, data): with open(self.config["output_yaml"], 'w') as fp: - yaml.dump(d, fp, default_flow_style=False) + yaml.dump(data, fp, default_flow_style=False) diff --git a/test/sources/test_parser_integration.py b/test/sources/test_parser_integration.py index 85b82d31..c607d28e 100644 --- a/test/sources/test_parser_integration.py +++ b/test/sources/test_parser_integration.py @@ -7,7 +7,7 @@ import argschema from argschema.sources.json_source import JsonSource, JsonSink from argschema.sources.yaml_source import YamlSource, YamlSink -from argschema.sources.source import MultipleConfiguredSourceError +from argschema.sources.source import MultipleConfigurationError class MyNestedSchema(argschema.schemas.DefaultSchema): @@ -91,7 +91,7 @@ def test_multisource_arg(yaml_inp): assert parser.args["a"] == 6 def test_multisource_arg_conflict(json_inp, yaml_inp): - with pytest.raises(MultipleConfiguredSourceError): + with pytest.raises(MultipleConfigurationError): parser = MyParser( input_sources=[JsonSource, YamlSource], args=["--input_yaml", yaml_inp, "--input_json", json_inp] @@ -116,7 +116,7 @@ def test_multisink_conflicting(yaml_inp, json_inp): yaml_out = os.path.join(os.path.dirname(yaml_inp), "out.yaml") json_out = os.path.join(os.path.dirname(json_inp), "out.json") - with pytest.raises(MultipleConfiguredSourceError): + with pytest.raises(MultipleConfigurationError): parser = MyParser( output_schema_type=MyOutputSchema, input_sources=[YamlSource], From 6ad1f5d0f09c31c61cf8ba0ebe47dc539a06fa43 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Mon, 24 Feb 2020 19:26:22 -0800 Subject: [PATCH 73/81] avoid security vulnerability in yaml sour e --- argschema/sources/yaml_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index 878a6cc2..3b6a90db 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -23,7 +23,7 @@ class YamlSource(ConfigurableSource): def get_dict(self): with open(self.config["input_yaml"], 'r') as fp: - return yaml.load(fp) + return yaml.load(fp, Loader=yaml.FullLoader) class YamlSink(ConfigurableSink): From 96b28e1c2992ccd441816b6dc7ddf9f0ad8d516d Mon Sep 17 00:00:00 2001 From: nile graddis Date: Mon, 24 Feb 2020 19:26:43 -0800 Subject: [PATCH 74/81] add example of multisource argschema parsing --- examples/multisource_example.json | 6 ++++ examples/multisource_example.py | 48 +++++++++++++++++++++++++++++++ examples/multisource_example.yaml | 3 ++ 3 files changed, 57 insertions(+) create mode 100644 examples/multisource_example.json create mode 100644 examples/multisource_example.py create mode 100644 examples/multisource_example.yaml diff --git a/examples/multisource_example.json b/examples/multisource_example.json new file mode 100644 index 00000000..49ad4497 --- /dev/null +++ b/examples/multisource_example.json @@ -0,0 +1,6 @@ +{ + "a_subschema": { + "an_int": 12 + }, + "a_float": 15.5 +} \ No newline at end of file diff --git a/examples/multisource_example.py b/examples/multisource_example.py new file mode 100644 index 00000000..7ac062cb --- /dev/null +++ b/examples/multisource_example.py @@ -0,0 +1,48 @@ +"""This example shows you how to register multiple input sources for your executable, which users can then select from dynamically when running it. This feature makes your code a bit more flexible about the format of the input parameters. + +There is a similar feature (not shown here) for specifying output sinks. It follows the same pattern. + +Usage +----- +# you can load parameters from a yaml ... +$ python examples/multisource_example.py --input_yaml examples/multisource_example.yaml +{'a_subschema': {'an_int': 13}, 'log_level': 'ERROR', 'a_float': 16.7} + +# ... or from an input json ... +$ python examples/multisource_example.py --input_json examples/multisource_example.json +{'a_float': 15.5, 'a_subschema': {'an_int': 12}, 'log_level': 'ERROR'} + +# ... but not both +$ python examples/multisource_example.py --input_json examples/multisource_example.json --input_yaml examples/multisource_example.yaml +argschema.sources.source.MultipleConfigurationError: more then one InputSource configuration present in {'input_json': 'examples/multisource_example.json', 'input_yaml': 'examples/multisource_example.yaml'} + +# command line parameters still override sourced ones +$ python examples/multisource_example.py --input_json examples/multisource_example.json --a_float 13.1 +{'a_float': 13.1, 'a_subschema': {'an_int': 12}, 'log_level': 'ERROR'} + +""" + +import argschema + +class SubSchema(argschema.schemas.DefaultSchema): + an_int = argschema.fields.Int() + +class MySchema(argschema.ArgSchema): + a_subschema = argschema.fields.Nested(SubSchema) + a_float = argschema.fields.Float() + + +def main(): + + parser = argschema.ArgSchemaParser( + schema_type=MySchema, + input_sources=[ # each source provided here will be checked against command-line arguments + argschema.sources.json_source.JsonSource, # ArgschemaParser includes this source by default + argschema.sources.yaml_source.YamlSource + ] + ) + + print(parser.args) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/multisource_example.yaml b/examples/multisource_example.yaml new file mode 100644 index 00000000..7323832d --- /dev/null +++ b/examples/multisource_example.yaml @@ -0,0 +1,3 @@ +a_subschema : + an_int: 13 +a_float: 16.7 \ No newline at end of file From ec504641b63b60096c829f4fc2fee005e0541d05 Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Thu, 5 Mar 2020 15:30:44 -0800 Subject: [PATCH 75/81] add missing docstring types; raise if url_source response is bad --- argschema/argschema_parser.py | 4 ++-- argschema/sources/url_source.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 6a96e808..03b1ffce 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -129,7 +129,7 @@ def register_sources( Parameters ---------- - sources : + sources : (sequence of) ConfigurableSource or None Each source will be registered (and may then be configured by data passed to this parser). If None is argued, the default_sources associated with this class will be registered. @@ -157,7 +157,7 @@ def register_sinks( Parameters ---------- - sinks : + sinks : (sequence of) ConfigurableSink or None Each sink will be registered (and may then be configured by data passed to this parser). If None is argued, the default_sinks associated with this class will be registered. diff --git a/argschema/sources/url_source.py b/argschema/sources/url_source.py index 3a270255..e7ee6fd1 100644 --- a/argschema/sources/url_source.py +++ b/argschema/sources/url_source.py @@ -33,4 +33,5 @@ def get_dict(self): )) response = requests.get(url) + response.raise_for_status() return response.json() From 23e91648d0aa8cecc5d52506336256ecf58e58cc Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Thu, 5 Mar 2020 15:35:00 -0800 Subject: [PATCH 76/81] update url_source test class to handle raise_for_status --- test/sources/test_url.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/sources/test_url.py b/test/sources/test_url.py index b57b0daa..5215476e 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -10,6 +10,10 @@ def __init__(self, json_data, status_code): self.json_data = json_data self.status_code = status_code + def raise_for_status(self): + if self.status_code >= 400: + raise requests.exceptions.HTTPError() + def json(self): return self.json_data From e777fc7cc86b2639a86840eb803d292b0cdfbd72 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 13 Apr 2020 09:05:52 -0700 Subject: [PATCH 77/81] initial readme change --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index cce862b6..3262a224 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,10 @@ OR pass a json_dictionary directly into the module with the parameters defined AND/OR pass parameters via the command line, in a way that will override the input_json or the json_dictionary given. +## Upgrading to version 3.0 +The major change in argschema 3.0 is introducing a more generalized interface for reading and writing dictionaries, referred to as ArgSource and ArgSink. + + ## Upgrading to version 2.0 The major change in argschema 2.0 is becoming compatible with marshmallow 3, which changes From 7cd3f38a57b832f3309689fdafbad31e0443f63b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 13 Apr 2020 09:25:14 -0700 Subject: [PATCH 78/81] edit readme --- README.md | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 3262a224..672ad69b 100644 --- a/README.md +++ b/README.md @@ -14,17 +14,6 @@ OR pass a json_dictionary directly into the module with the parameters defined AND/OR pass parameters via the command line, in a way that will override the input_json or the json_dictionary given. -## Upgrading to version 3.0 -The major change in argschema 3.0 is introducing a more generalized interface for reading and writing dictionaries, referred to as ArgSource and ArgSink. - - -## Upgrading to version 2.0 -The major change in argschema 2.0 is becoming -compatible with marshmallow 3, which changes -many of the ways your schemas and schema modifications work. Some noteable differences are that schemas are strict now by default, so tossing keys in your outputs or inputs that were ignored and stripped before now throw errors unless -def -Please read this document for more guidance -https://marshmallow.readthedocs.io/en/stable/upgrading.html ## Level of Support We are planning on occasional updating this tool with no fixed schedule. Community involvement is encouraged through both issues and pull requests. Please make pull requests against the dev branch, as we will test changes there before merging into master. @@ -79,7 +68,7 @@ You start building some code in an ipython notebook to play around with a new id It's a mess, and you know you should migrate your code over to a module that you can call from other programs or notebooks. You start collecting your input variables to the top of the notebook and make yourself a wrapper function that you can call. However, now your mistake in filename typing is a disaster because the file doesn't exist, and your code doesn't check for the existence of the file until quite late. You start implementing some input validation checks to avoid this problem. -Now you start wanting to integrate this code with other things, including elements that aren't in python. You decide that you need to have a command line module that executes the code, because then you can use other tools to stitch together your processing, like maybe some shell scripts or docker run commands. You implement an argparse set of inputs and default values that make your python program a self-contained program, with some help documentation. Along the way, you have to refactor the parsed argparse variables into your function and strip out your old hacky validation code to avoid maintaining two versions of validation in the future. +Now you start wanting to integrate this code with other things, including elements that aren't in python. You decide that you need to have a command line module that executes the code, because then you can use other tools to stitch together your processing, like maybe some shell scripts or docker run commands. You implement an argparse set of inputs and default values that make your python program a self-contained program, with some helpful documentation. Along the way, you have to refactor the parsed argparse variables into your function and strip out your old hacky validation code to avoid maintaining two versions of validation in the future. This module starts becoming useful enough that you want to integrate it into more complex modules. You end up copying and pasting various argparse lines over to other modules, and then 5 other modules. Later you decide to change your original module a little bit, and you have a nightmare of code replace to fix up the other modules to mirror this phenomenon.. you kick yourself for not having thought this through more clearly. @@ -89,5 +78,25 @@ If you had only designed things from the beginning to allow for each of these us This is what argschema is designed to do. + +## Upgrading to version 3.0 +The major change in argschema 3.0 is introducing a more generalized interface for reading and writing dictionaries, referred to as ArgSource and ArgSink. One can define customized classes that read dictionaries from any source you can code, such as making a database call, reading from a web service, reading a yaml file, etc. Argschema isn't just for json anymore. Similarly you can now dynamically tell your ArgSchemaParser to write output to an Argsink, which might write to a database, a webservice, or a messaging service. This enables those integrating modules into larger workflow management solutions more flexibility in wiring up your python modules to those systems. + +It also removes features that were marked previously as deprecated. + +Notably parsing List arguments with --listarg a b c, which instead should be called as --listarg a,b,c. In other words cli_as_single_argument = False is no longer an option. + +It also removes the old names JsonModule, ModuleParameters, which are now ArgSchemaParser and ArgSchema respectively. + +The field OptionList has been removed. The same functionality can be accomplished with the keyword, validate=mm.validate.OneOf([a,b,c...]) in the field definition. + +## Upgrading to version 2.0 +The major change in argschema 2.0 is becoming +compatible with marshmallow 3, which changes +many of the ways your schemas and schema modifications work. Some noteable differences are that schemas are strict now by default, so tossing keys in your outputs or inputs that were ignored and stripped before now throw errors. + +Please read this document for more guidance +https://marshmallow.readthedocs.io/en/stable/upgrading.html + Copyright 2017 Allen Institute From 1db1619240d1c08b9c9844dd0ac4478f307888fd Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 1 Jul 2020 07:23:39 -0700 Subject: [PATCH 79/81] making compatible with masrhmallow 3.6.1 --- argschema/autodoc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/argschema/autodoc.py b/argschema/autodoc.py index c2fd2c79..58db57a8 100644 --- a/argschema/autodoc.py +++ b/argschema/autodoc.py @@ -123,6 +123,7 @@ def setup(app): except Exception as e: # in case this fails for some reason, note it as unknown # TODO handle this more elegantly, identify and patch up such cases + print(e) field_line += "unknown,unknown" lines.append(field_line) # lines.append(table_line) From 9c394782907c6fd4a2a0180f6303bd5d4ce5c502 Mon Sep 17 00:00:00 2001 From: Dan Kapner Date: Thu, 25 Mar 2021 12:07:16 -0700 Subject: [PATCH 80/81] fixes test to work with configurable sink --- test/test_argschema_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_argschema_parser.py b/test/test_argschema_parser.py index 6cb42d0e..8a9e95d5 100644 --- a/test/test_argschema_parser.py +++ b/test/test_argschema_parser.py @@ -85,10 +85,12 @@ def test_parser_output(tmpdir_factory): 'nest': { 'one': 7, 'two': False - } + }, + 'output_json': str(json_path), + 'output_json_indent': 2 } mod = MyParser(input_data=input_data, args=[]) - mod.output(mod.args, output_path=str(json_path), indent=2) + mod.output(mod.args) with open(str(json_path), 'r') as jf: obt = json.load(jf) assert(obt['nest']['one'] == mod.args['nest']['one']) From 7c363553d5bdcf2c93f8fad685d1ee81ed0063ae Mon Sep 17 00:00:00 2001 From: Dan Kapner Date: Thu, 25 Mar 2021 12:07:43 -0700 Subject: [PATCH 81/81] comments out troublesome deprecated list test --- test/test_cli_overrides.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/test/test_cli_overrides.py b/test/test_cli_overrides.py index fd041ccf..8f380b9e 100644 --- a/test/test_cli_overrides.py +++ b/test/test_cli_overrides.py @@ -210,15 +210,28 @@ def test_override_list(test_data): args=["--list", "invalid"]) -def test_override_list_deprecated(deprecated_data): - with pytest.warns(FutureWarning): - mod = ArgSchemaParser(deprecated_data, schema_type=MyDeprecatedSchema, - args=["--list_deprecated", "1000", "3000"]) - assert(mod.args["list_deprecated"] == [1000, 3000]) - with pytest.raises(mm.ValidationError): - mod = ArgSchemaParser(deprecated_data, - schema_type=MyDeprecatedSchema, - args=["--list_deprecated", "[1000,3000]"]) +# @pytest.fixture +# def deprecated_data(): +# data = { +# "list_deprecated": [300, 200, 800, 1000], +# } +# return data +# +# +# class MyDeprecatedSchema(ArgSchema): +# list_deprecated = fields.List(fields.Int, required=True) +# +# +# def test_override_list_deprecated(deprecated_data): +# with pytest.warns(FutureWarning): +# mod = ArgSchemaParser(input_data=deprecated_data, +# schema_type=MyDeprecatedSchema, +# args=["--list_deprecated", "1000", "3000"]) +# assert(mod.args["list_deprecated"] == [1000, 3000]) +# with pytest.raises(mm.ValidationError): +# mod = ArgSchemaParser(deprecated_data, +# schema_type=MyDeprecatedSchema, +# args=["--list_deprecated", "[1000,3000]"]) # def test_override_localdatetime(test_data):