diff --git a/docs/conf.py b/docs/conf.py index 52c13a1a..85e71c10 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ # -- Project information ----------------------------------------------------- project = u'propnet' -copyright = u'2018, The Propnet Development Team' +copyright = u'2019, The Propnet Development Team' author = u'The Propnet Development Team' # The short X.Y version diff --git a/propnet/core/__init__.py b/propnet/core/__init__.py index e69de29b..088a6920 100644 --- a/propnet/core/__init__.py +++ b/propnet/core/__init__.py @@ -0,0 +1,7 @@ +""" +This module contains the core architecture of propnet. These components provide the infrastructure for building +the propnet knowledge graph, populating it with data, and traversing it to uncover new data. It also contains +analysis modules to inspect materials properties. + +This package is in active development, so analysis and core functionality is continually being added! +""" diff --git a/propnet/core/exceptions.py b/propnet/core/exceptions.py index da263ac9..0ca0dda7 100644 --- a/propnet/core/exceptions.py +++ b/propnet/core/exceptions.py @@ -1,6 +1,7 @@ """ Module defining exception and warning classes. """ +# TODO: Maybe expand these? class ModelEvaluationError(RuntimeError): @@ -15,4 +16,4 @@ class IncompleteData(Warning): class SymbolConstraintError(RuntimeError): """Invalid quantity value with respect to symbol constraints""" - pass \ No newline at end of file + pass diff --git a/propnet/core/fitting.py b/propnet/core/fitting.py index 46111ede..50e495b0 100644 --- a/propnet/core/fitting.py +++ b/propnet/core/fitting.py @@ -1,6 +1,21 @@ -""" -This module contains code relevant to using fitting to improve -the aggregation process +"""Benchmarking routine for improved aggregation of properties. + +This module contains routines to benchmark models against experimental values to improve the aggregation scheme +for quantities. By default, when a ``Material`` contains multiple derived quantities for a given property (symbol), +they are aggregated using a simple, unweighted mean. However, depending on the quality of the models used to produce +those quantities, this may not be ideal. + +These routines calculate optimal weights for models given an experimental dataset of materials to match. + +Example: + >>> from propnet.core.fitting import fit_model_scores + >>> from propnet.core.materials import Material + >>> materials = [Material(...), ...] # a list of materials populated with properties + >>> benchmarks = [ + >>> {'symbol_name': ...}, ... # a list of benchmark data as dicts + >>> ] + >>> # select models for which to calculate weights and run + >>> scores = fit_model_scores(materials, benchmarks, models=['model_1', 'model_2', ...]) """ @@ -9,24 +24,25 @@ from collections import OrderedDict import numpy as np - from scipy.optimize import minimize, Bounds, LinearConstraint -from propnet.core.quantity import QuantityFactory +from propnet.core.quantity import QuantityFactory # noinspection PyUnresolvedReferences import propnet.models from propnet.core.registry import Registry + def aggregate_quantities(quantities, model_score_dict=None): """ - Simple method for aggregating a set of quantities + Simple method for aggregating a set of quantities. Args: - quantities: - model_score_dict: + quantities (`iterable` of `propnet.core.quantity.NumQuantity`): iterable of Quantity objects to aggregate + model_score_dict (dict): dict of weights to apply to models, keyed + by model name or Model object Returns: - + propnet.core.quantity.NumQuantity: resulting quantity from aggregation """ symbol = next(iter(quantities)).symbol if not all([q.symbol == symbol for q in quantities]): @@ -39,14 +55,15 @@ def aggregate_quantities(quantities, model_score_dict=None): def get_weight(quantity, model_score_dict=None): """ - Gets weight based on scoring scheme + Calculates weight based on scoring scheme and provenance of quantities. Args: - quantity (Quantity): quantity for which to get weight - model_score_dict ({str: float}): dictionary of model names to scores + quantity (propnet.core.quantity.NumQuantity): quantity for which to get weight + model_score_dict (dict): dict of weights as floats to apply to models, keyed + by model name or Model object Returns: - calculated weight for input quantity + float: calculated weight for input quantity """ if quantity.provenance is None or quantity.provenance.inputs is None: return 1 @@ -62,25 +79,23 @@ def get_weight(quantity, model_score_dict=None): def fit_model_scores(materials, benchmarks, models=None, init_scores=None, constrain_sum=False): """ - Fits a set of model scores to a set of benchmark data + Fits a set of model scores/weights to a set of benchmark data by minimizing the sum of squared errors + with the benchmarking data. Args: - materials ([Material]): list of evaluated materials containing + materials (`list` of `propnet.core.materials.Material`): list of evaluated materials containing symbols for benchmarking - benchmarks ([{Symbol or str: float}]): list of dicts, keyed by Symbol - or symbol name containing benchmark data for each material in ``materials``. - models ([Model or str]): list of models which should have their - scores adjusted in the aggregation weighting scheme - init_scores ({str: float}): initial scores for minimization procedure. - If unspecified, all scores are equal. Scores are normalized to sum of - scores. - constrain_sum (bool): True constrains the sum of weights to 1, False - removes this constraint. Default: False (no constraint) + benchmarks (`list` of `dict`): list of dicts, keyed by Symbol or symbol name, containing benchmark data + for each material in ``materials``. + models (`list` of `propnet.core.models.Model` or `list` of `str` or `None`): optional, list of models whose + scores will be adjusted in the aggregation weighting scheme. Default: `None` (all models will be adjusted) + init_scores (dict): optional, dict containing initial scores for minimization procedure, keyed by model name + or Model. Scores are normalized to sum of scores. Default: `None` (all scores are equal) + constrain_sum (bool): optional, ``True`` constrains the sum of scores to 1, ``False`` + removes this constraint. Default: ``False`` (no constraint) Returns: - {str: float} scores corresponding to those which minimize - SSE for the benchmarked dataset - + OrderedDict: dict of scores corresponding to the minimized sum of squared errors, keyed by model. """ # Probably not smart to have ALL available models in the list. That's a lot of DOF. # TODO: Perhaps write a method to produce a list of models in the provenance trees @@ -109,18 +124,19 @@ def f(f_scores): def get_sse(materials, benchmarks, model_score_dict=None): """ - Function to get the sum squared error of a set of benchmarks - with aggregated data from the model scoring scheme above + Calculate the sum squared error for aggregated data + weighted by the specified model scoring scheme against a set of benchmarks. Args: - materials ([Material]): list of materials to evaluate - benchmarks ([{Symbol or str: float}]): list of benchmarks - for each material - model_score_dict ({str: float}): model score dictionary - with scores for each model name + materials (`list` of `propnet.core.materials.Material`): list of evaluated materials containing + symbols for benchmarking + benchmarks (`list` of `dict`): list of dicts, keyed by Symbol or symbol name, containing benchmark data + for each material in ``materials``. + model_score_dict (dict): dict of weights as floats to apply to models, keyed + by model name or Model object Returns: - (float): sum squared error over all the benchmarks + float: sum squared error over all the benchmarks """ sse = 0 diff --git a/propnet/core/graph.py b/propnet/core/graph.py index 64120416..be37ea7d 100644 --- a/propnet/core/graph.py +++ b/propnet/core/graph.py @@ -1,17 +1,46 @@ -""" -Module containing classes and methods for graph functionality in Propnet code. +"""Knowledge graph builder and traversal. + +This module contains the class responsible for building the propnet knowledge graph and +traversing it when materials properties are applied to it in order to derive all possible +new materials properties. + +The knowledge graph is built from the contents of the propnet model and symbol ``Registry``. +In order to use models and symbols that are built-in to propnet, be sure to: + +>>> import propnet.models # For symbols and models +>>> import propnet.symbols # For symbols only + +Examples: + To run the knowledge graph on a material, import the models and symbols desired from + the built-in library (see above) or add your own custom models/symbols to the ``Registry``: + + >>> from propnet.core.symbols import Symbol + >>> from propnet.core.models import EquationModel # Or whichever type you are making + >>> from propnet.core.materials import Material + >>> sym1 = Symbol(...) # Register custom symbols (register=True is default) + >>> sym2 = Symbol(...) + >>> model1 = EquationModel(...) # Register custom model (register=True is default) + + Then you can run the graph traversal algorithm using the models chosen. + + >>> from propnet.core.graph import Graph + >>> g = Graph(parallel=True) # Can run in parallel mode + >>> material = Material(...) # Create a material with some properties + >>> evaluated_material = g.evaluate(material) # Run graph traversal on material """ import logging from collections import defaultdict from itertools import product, chain, repeat -from chronic import Timer, timings, clear -from pandas import DataFrame from collections import deque import concurrent.futures from functools import partial from multiprocessing import cpu_count import copy +from typing import Set, Dict, Union + +from chronic import Timer, timings, clear as clear_timings +from pandas import DataFrame import numpy as np import networkx as nx @@ -20,49 +49,53 @@ from propnet.core.models import Model, CompositeModel from propnet.core.quantity import QuantityFactory from propnet.core.provenance import SymbolTree, TreeElement -from propnet.symbols import Symbol +from propnet.core.symbols import Symbol from propnet.core.utils import Timeout - from propnet.core.registry import Registry -from typing import Set, Dict, Union - - logger = logging.getLogger(__name__) +"""logging.Logger: Logger for debugging""" -class Graph(object): +class Graph: """ - Class containing methods for creating and interacting with a - Property Network. - The Property Network contains a set of Node namedtuples with - connections stored as directed edges between the nodes. - Upon initialization a base graph is constructed consisting of all - valid SymbolTypes and Models found in surrounding folders. These are - Symbol and Model node_types respectively. Connections are formed - between the nodes based on given inputs and outputs of the models. - At this stage the graph represents a symbolic web of properties - without any actual input values. - Materials and Properties / Conditions can be added at runtime using - appropriate support methods. These methods dynamically create - additional PropnetNodes and edges on the graph of Material and - Quantity node_types respectively. - Given a set of Materials and Properties / Conditions, the symbolic - web of properties can be utilized to predict values of connected - properties on demand. - Attributes: - _symbol_types ({str: Symbol}): data structure mapping Symbol - name to Symbol object. - _models ({str: Model}): data structure mapping Model name to - Model object. - _input_to_model ({Symbol: {Model}}): data structure mapping - Symbol inputs to a set of corresponding Model objects that - take that Symbol as an input. - _output_to_model ({Symbol: {Model}}): data structure mapping - Symbol outputs to a set of corresponding Model objects that - produce that Symbol as an output. - *** Dictionaries can be searched by supplying Symbol objects or - Strings as to their names. + Class containing methods for creating and interacting with the propnet knowledge graph. + This graph accepts a material (or multiple for composite models) and recursively applies + the knowledge graph to derive all possible property values (quantities) available on + the graph. + + Notes: + To use the built-in propnet models, you must explicitly import them using: + + >>> import propnet.models + + This will register the models in the ``Registry`` and allow you to load them. + If you do not import them or supply them yourself at instantiation, the knowledge + graph will contain no models. + + Examples: + The ``evaluate()`` method will be the main entry point for the graph evaluation. + + >>> from propnet.core.graph import Graph + >>> from propnet.core.materials import Material + >>> g = Graph() + >>> material = Material(...) + >>> evaluated_material = g.evaluate(material) + + However, propnet also has limited support for composite materials (materials made up + of more than one material). For those materials, use ``evaluate_composite()``. + + >>> from propnet.core.graph import Graph + >>> from propnet.core.materials import Material, CompositeMaterial + >>> g = Graph() + >>> m1 = Material(...) + >>> m2 = Material(...) + >>> evaluated_material = g.evaluate_composite(CompositeMaterial([m1, m2])) + + The composite evaluation algorithm is somewhat slow and there are not a large number + of composite models available in the built-in library. However, we are always + accepting contributions! + """ def __init__(self, @@ -72,19 +105,20 @@ def __init__(self, parallel: bool = False, max_workers: int = None) -> None: """ - Creates a graph instance. - - Note: models and symbols are selected from the Registry() class unless specified explicitly. - To include all built-in propnet models, import them from propnet.models and propnet.symbols. - Args: - models (dict): models to use for graph evaluation. Default: all registered models. - composite_models (dict): composite models to use for graph evaluation. - Default: all registered composite models. - symbol_types (dict): symbols to use for graph evaluation. Default: all registered symbols - parallel (bool): True creates a pool of workers for parallel graph evaluation. Default: False - max_workers (int): Number of workers for parallel worker pool. Default: None, for serial evaluation, - max number of available CPUs for parallel. + models (`dict` or `None`): optional, dict of models to use for graph evaluation, + keyed by model name. Default: ``None`` (dictionary returned by + ``Registry('models')``) + composite_models (`dict` or `None`): optional, dict of composite models to use for + graph evaluation, keyed by model name. Default: ``None`` (dictionary returned by + ``Registry('composite_models')``) + symbol_types (`dict` or `None`): optional, dict of symbols to use for graph evaluation. + Note all symbols used by the desired models must be included or an error will occur. + Default: ``None`` (dictionary returned by ``Registry('symbols')``) + parallel (bool): ``True`` creates a pool of workers for parallel graph evaluation. + Default: ``False`` (runs serially) + max_workers (int): Number of workers for parallel worker pool. + Default: ``None`` (1 for serial, max number of available CPUs for parallel) """ # set our defaults if no models/symbol types supplied @@ -128,11 +162,12 @@ def __init__(self, def __str__(self): """ - Returns a full summary of the graph in terms of the SymbolTypes, + Returns a full summary of the graph in terms of the Symbols, Materials, and Models that it contains. Connections are shown as nesting within the printout. + Returns: - (str) representation of this Graph object. + str: representation of this Graph object. """ summary = ["Propnet Printout", ""] summary += ["Properties"] @@ -147,26 +182,24 @@ def __str__(self): def update_symbol_types(self, symbol_types): """ - Add / redefine user-defined symbol types to the graph. If the - input, symbol_types, includes keys in self._symbol_types, - they are redefined. + Adds Symbol objects to the graph. If a Symbol with a given name + is already defined on the graph, it will be replaced. + Args: - symbol_types ({name: Symbol}): symbol types to add - Returns: - None + symbol_types (dict): dictionary of ``Symbol`` objects to add, + keyed by symbol name """ - for (k, v) in symbol_types.items(): - self._symbol_types[k] = v + self._symbol_types.update(symbol_types) def remove_symbol_types(self, symbol_types): """ - Removes user-defined Symbol objects to the Graph. Removes + Removes Symbol objects from the Graph. Removes any models that input or output this Symbol because they - are no longer defined without the given symbol_types. + can no longer be defined without the given Symbol. + Args: - symbol_types ({name:Symbol}): symbol types to remove - Returns: - None + symbol_types (dict): dictionary of ``Symbol`` objects to remove, + keyed by symbol name """ models_to_remove = {} for symbol in symbol_types.keys(): @@ -189,27 +222,25 @@ def remove_symbol_types(self, symbol_types): def get_symbol_types(self): """ - Getter method, returns a set of all Symbol objects - present on the graph. - Returns ({Symbol}): - set of symbols present on the graph + Gets a set of all Symbol objects present on the graph. + + Returns: + `set` of `propnet.core.symbols.Symbol`: symbols present on the graph """ - to_return = set() - for s in self._symbol_types.values(): - to_return.add(s) - return to_return + return set(self._symbol_types.values()) def update_models(self, models): """ - Add / redefine user-defined models to the graph. If the input, - models, includes keys in self._models, they are redefined. + Adds Model objects to the graph. If a Model with a given name + is already defined on the graph, it will be replaced. The addition of a model may fail if appropriate Symbol objects are not already on the graph. If any addition operation fails, the entire update is aborted. + Args: - models ({name: Model}): Instances of the model class - Returns: - None + models (dict): dictionary of ``Model`` objects to add, + keyed by model name + """ added = {} for model in models.values(): @@ -235,11 +266,11 @@ def update_models(self, models): def remove_models(self, models): """ - Remove user-defined models from the Graph. + Removes models from the graph. + Args: - models ({name: Model}): Instances of the model class - Returns: - None + models (dict): dictionary of ``Model`` objects to remove, + keyed by model name """ for model in models.keys(): if model not in self._models.keys(): @@ -257,26 +288,24 @@ def remove_models(self, models): def get_models(self) -> Dict[str, Model]: """ - Getter method, returns a set of all model objects present - on the graph. - Returns ({Model}): - set of models in the graph + Gets a set of all Model objects present on the graph. + + Returns: + dict: dictionary of models present on the graph, keyed by name """ - to_return = dict() - for model in self._models.values(): - to_return[model.name] = model - return to_return + return {model.name: model for model in self._models.values()} def update_composite_models(self, composite_models): """ - Add / redefine user-defined composite_models to the graph. - If the input, composite_models, includes keys in self._composite_models, they are redefined. - The addition of a composite_models may fail if appropriate Symbol objects are not already on the graph. - If any addition operation fails, the entire update is aborted. + Adds composite models (CompositeModel objects) to the graph. + If a CompositeModel with a given name is already defined on the graph, + it will be replaced. The addition of a model may fail if appropriate Symbol objects + are not already on the graph. If any addition operation fails, + the entire update is aborted. + Args: - composite_models (dict): Instances of the CompositeModel class - Returns: - None + composite_models (dict): dictionary of ``CompositeModel`` objects to add, + keyed by model name """ added = {} for model in composite_models.values(): @@ -286,41 +315,48 @@ def update_composite_models(self, composite_models): for input_ in input_set: input_ = CompositeModel.get_variable(input_) if input_ not in self._symbol_types.keys(): + self.remove_composite_models(added) raise KeyError("Attempted to add a model to the property " "network with an unrecognized Symbol. " "Add {} Symbol to the property network before " "adding this model.".format(input_)) - def remove_composite_models(self, super_models): + def remove_composite_models(self, composite_models): """ - Remove user-defined models from the Graph. + Removes composite models from the graph. + Args: - super_models (dict): Instances of the SuperModel class - Returns: - None + composite_models (dict): dictionary of ``CompositeModel`` objects to remove, + keyed by model name """ - for model in super_models.keys(): + for model in composite_models.keys(): if model not in self._composite_models.keys(): raise Exception("Attempted to remove a model not currently present in the graph.") del self._composite_models[model] def get_composite_models(self): """ - Getter method, returns a set of all model objects present on the graph. + Gets a set of all CompositeModel objects present on the graph. + Returns: - (set) + `set` of `propnet.core.models.CompositeModel`: composite models + present on the graph """ - to_return = set() - for model in self._composite_models.values(): - to_return.add(model) - return to_return + return set(self._composite_models.values()) def get_networkx_graph(self, include_orphans=True): """ - Generates a networkX data structure representing the property - network and returns this object. + Generates a networkX data structure representing the propnet knowledge + graph with Symbol and Model objects as nodes and their input/output as + directed edges. + + Args: + include_orphans (bool): optional, ``True`` adds symbols which are not + connected to any models to the graph object. ``False`` omits them. + Default: ``True`` (include unconnected symbols) + Returns: - (networkX.multidigraph) + networkx.MultiDiGraph: NetworkX representation of knowledge graph """ graph = nx.MultiDiGraph() @@ -341,6 +377,7 @@ def get_networkx_graph(self, include_orphans=True): graph.add_node(symbol) # Format nodes + # TODO: Update nx formatting with current cytoscape style for node in graph: if isinstance(node, Symbol): nx.set_node_attributes(graph, {node: "#43A1F8"}, "fillcolor") @@ -357,15 +394,18 @@ def get_networkx_graph(self, include_orphans=True): def create_file(self, filename='out.dot', draw=False, prog='dot', include_orphans=False, **kwargs): """ - Output the graph to a file + Output the propnet knowledge graph to a file using pygraphviz. + Args: - filename (str): filename for file - draw (bool): whether to draw or write file - include_orphans (bool): whether to include orphan symbols - in graph output - **kwargs (kwargs): kwargs to draw or write - Returns: - None + filename (str): optional, filename for file. Default: ``'out.dot'`` + draw (bool): optional, ``True`` renders positions for the nodes and + edges with ``pygraphviz.AGraph.draw()``. ``False`` outputs only the + abstract node/edge data using ``pygraphviz.AGraph.write()``. + Default: ``False`` (write data only) + include_orphans (bool): optional, ``True`` adds symbols which are not + connected to any models to the graph object. ``False`` omits them. + Default: ``True`` (include unconnected symbols) + **kwargs: optional parameters to pygraphviz ``draw()`` or ``write()``. """ nxgraph = self.get_networkx_graph(include_orphans) agraph = nx.nx_agraph.to_agraph(nxgraph) @@ -375,39 +415,40 @@ def create_file(self, filename='out.dot', draw=False, prog='dot', else: agraph.write(filename, **kwargs) - # TODO: can we remove this? - def calculable_properties(self, property_type_set): + # TODO: can we remove this or make it simpler? + def calculable_properties(self, input_symbols): """ - Given a set of Symbol objects, returns all new Symbol objects + Given a set of input Symbol objects, determines all new Symbol objects that may be calculable from the inputs. Resulting set contains only those new Symbol objects derivable. - The result should be used with caution: - 1) Models may not produce an output if their input - conditions are not met. - 2) Models may require more than one Quantity of a - given Symbol type to generate an output. + + Notes: + The result should be used with caution: + - Models may not produce an output if their input + conditions are not met. + - Models may require more than one Quantity of a + given Symbol type to generate an output. + Args: - property_type_set ({Symbol}): the set of Symbol objects - taken as starting properties. + input_symbols (`set` of `propnet.core.symbols.Symbol`): the set of + Symbol objects taken as starting input properties. + Returns: - (({Symbol}, {Model})) the set of all Symbol objects that - can be derived from the property_type_set, the set of - all Model objects that are used in deriving the new - Symbol objects. + `set` of `propnet.core.symbols.Symbol`: the set of all Symbol objects that + can be derived from the given input Symbols + """ # Set of theoretically derivable properties. derivable = set() # Set of theoretically available properties. - working = set() - for property_type in property_type_set: - working.add(property_type) + working = set(input_symbols) # Set of all models that could produce output. all_models = set() c_models = set() - for property_type in property_type_set: - for model in self._input_to_model[property_type]: + for sym in input_symbols: + for model in self._input_to_model[sym]: all_models.add(model) c_models.add(model) @@ -470,47 +511,52 @@ def calculable_properties(self, property_type_set): return derivable - def required_inputs_for_property(self, property_): + def required_inputs_for_property(self, target): """ Determines all potential paths leading to a given symbol object. Answers the question: What sets of properties are required to calculate this given property? + Paths are represented as a series of models and required input Symbol objects. Paths can be searched to determine specifically how to get from one property to another. - Warning: Method indicates sets of Symbol objects required - to calculate the property. It does not indicate how + + Notes: + Warning: Method indicates sets of Symbol objects required + to calculate the property. It does not indicate how many of each Symbol is required. It does not guarantee - that supplying Quantities of these types will result - in a new Symbol output as conditions / assumptions may + that supplying Quantity objects of these types will result + in a new Symbol output as conditions/constraints may not be met. + + Args: + target (Symbol): desired target symbol Returns: - propnet.core.utils.SymbolTree + SymbolTree: pathways to target property represented as a tree """ - head = TreeElement(None, {property_}, None, None) + head = TreeElement(None, {target}, None, None) self._tree_builder(head) return SymbolTree(head) - def _tree_builder(self, to_expand: TreeElement): + def _tree_builder(self, tree_to_expand: TreeElement): """ - Recursive helper method to build a SymbolTree. Fills in + Recursive helper method to build a SymbolTree. Fills in the children of to_expand by all possible model substitutions. + Args: - to_expand: (TreeElement) element that will be expanded - Returns: - None + tree_to_expand (TreeElement): element that will be expanded in place """ # Get set of symbols that no longer need to be replaced and # symbols that are candidates for replacement. replaced_symbols = set() # set of all symbols already replaced. # equal to all parents' minus expand's symbols. - parent = to_expand.parent + parent = tree_to_expand.parent while parent is not None: replaced_symbols.update(parent.inputs) parent = parent.parent - replaced_symbols -= to_expand.inputs - candidate_symbols = to_expand.inputs - replaced_symbols + replaced_symbols -= tree_to_expand.inputs + candidate_symbols = tree_to_expand.inputs - replaced_symbols # Attempt to replace candidate_symbols # Replace them with inputs to models that output the candidate_symbols. @@ -520,7 +566,7 @@ def _tree_builder(self, to_expand: TreeElement): for symbol in candidate_symbols: c_models = self._output_to_model[symbol] for model in c_models: - parent = to_expand.parent + parent = tree_to_expand.parent parent: TreeElement can_continue = True while parent is not None: @@ -539,28 +585,34 @@ def _tree_builder(self, to_expand: TreeElement): if not can_continue: continue input_set = input_set | model.constraint_symbols - new_types = (to_expand.inputs - output_set) + new_types = (tree_to_expand.inputs - output_set) new_types.update(input_set) new_types = {self._symbol_types[x] for x in new_types} if new_types in prev[model]: continue prev[model].append(new_types) - new_element = TreeElement(model, new_types, to_expand, None) + new_element = TreeElement(model, new_types, tree_to_expand, None) self._tree_builder(new_element) outputs.append(new_element) # Add outputs to children and fill in their elements. - to_expand.children = outputs + tree_to_expand.children = outputs # TODO: can we remove this? def get_paths(self, start_property, end_property): """ - Returns all Paths + Returns all paths between two properties. + + Notes: + This method is very computationally expensive in its current implementation. + We are actively seeking more efficient ways of calculating pathways between + properties. Args: - start_property: (Symbol) starting Symbol type - end_property: (Symbol) ending Symbol type + start_property (Symbol): starting Symbol type + end_property (Symbol): ending Symbol type Returns: - (list) list enumerating the features of all paths. + `list` of `propnet.core.provenance.SymbolPath`: list enumerating the features + of all paths """ tree = self.required_inputs_for_property(end_property) return tree.get_paths_from(start_property) @@ -568,10 +620,24 @@ def get_paths(self, start_property, end_property): def get_degree_of_separation(self, start_property: Union[str, Symbol], end_property: Union[str, Symbol]) -> Union[int, None]: """ - Returns the minimum number of models separating two properties. - Returns 0 if the start_property and end_property are equal. - Returns None if the start_property and end_properties are not connected. + Determines the minimum number of models separating two properties (symbols) + on the propnet knowledge graph. + + Notes: + Because the propnet knowledge graph is directed, A->B may have a + valid pathway, but B->A may not. + + Args: + start_property (`str` or `Symbol`): starting/input property + end_property (`str` or `Symbol`): ending/derived property + + Returns: + `int` or `None`: the minimum number of models separating the two properties, + where ``0`` indicates the starting and ending properties are equal and ``None`` + indicates the two properties are not connected by any models. """ + + # TODO: Would it be faster to use networkx? # Ensure we have the properties in the graph. if start_property not in self._symbol_types.keys(): raise ValueError("Symbol not found: " + str(start_property)) @@ -622,35 +688,45 @@ def get_degree_of_separation(self, start_property: Union[str, Symbol], @staticmethod def generate_input_sets(props, this_quantity_pool): """ - Generates all combinatorially-unique sets of input dicts given - a list of property names and a quantity pool + Generates all unique combinations of quantities given a list of needed + symbols/properties names and a pool of quantities to choose from. + Args: - props ([str]): property names - this_quantity_pool ({Symbol: Set(Quantity)}): quantities - keyed by symbols - Returns ([{str: Quantity}]): - list of symbol strings mapped to Quantity values. + props (`list` of `str` or `propnet.core.symbols.Symbol`): desired properties + in input set + this_quantity_pool (dict): quantity pool, as a dictionary of sets of quantities + keyed by their Symbol or symbol name + Yields: + `tuple` of `Quantity`: tuple of length ``len(props)`` containing Quantity objects + corresponding to each symbol in ``props``. """ aggregated_symbols = [] for prop in props: if prop not in this_quantity_pool.keys(): - return [] + return aggregated_symbols.append(this_quantity_pool[prop]) - return product(*aggregated_symbols) + yield from product(*aggregated_symbols) @staticmethod def get_input_sets_for_model(model, new_quantities, old_quantities): """ - Generates all of the valid input sets for a given model, a fixed - quantity, and a quantity pool from which to draw remaining properties + Generates all valid input sets for a given model, containing at least + one Quantity from ``new_quantities`` with the remainder drawn from + ``old_quantities``. + Args: model (Model): model for which to evaluate valid input sets - new_quantities ({symbol: [Quantity]}): quantities generated - during the most recent iteration of the evaluation loop - old_quantities ({symbol: [Quantity]}): quantities generated - in previous iterations of the evaluation loop + new_quantities (dict): quantities generated + during the most recent iteration of the evaluation loop, + as lists of Quantity objects keyed by symbol + old_quantities (dict): quantities generated + in previous iterations of the evaluation loop, + as lists of Quantity objects keyed by symbol Returns: - list of sets of input quantities for the model + Tuple[iterator, int]: returns tuple containing: + + - iterator yielding input sets as tuples of Symbol objects + - integer corresponding to the number of items in the iterator """ all_input_sets = [] @@ -682,17 +758,24 @@ def get_input_sets_for_model(model, new_quantities, old_quantities): def generate_models_and_input_sets(self, new_quantities, quantity_pool): """ - Helper method to generate input sets for models + Produces all input sets for all models on the graph that contain at least + one Quantity from ``new_quantities``. + Args: - new_quantities ([Quantity]): list of new quantities from which - to derive new input sets (these are "fixed" quantities - in generate_input_sets_for_model) - quantity_pool ({symbol: {Quantity}}): dict of quantity sets - keyed by symbol from which to draw additional quantities + new_quantities (`list` of `BaseQuantity`): list of new quantities from which + to derive new input sets + quantity_pool (dict): dict of Quantity sets, + keyed by symbol, from which to draw additional quantities for model inputs Returns: - ([tuple]): list of tuples of models and their associated input - sets, uses tuple so duplicate checking can be performed + Tuple[iterator, int]: tuple that contains: + + - an iterator containing models and input sets as tuples containing: + + - ``Model`` instance for which the input set is valid + - tuple of Quantity objects representing the input set + + - an integer representing the total number of input sets the iterator """ models_and_input_sets = [] n_total_input_sets = 0 @@ -717,28 +800,30 @@ def generate_models_and_input_sets(self, new_quantities, quantity_pool): def derive_quantities(self, new_quantities, quantity_pool=None, allow_model_failure=True, timeout=None): """ - Derives new quantities using at least one quantity from "new_quantities" and the - remainder from either "new_quantities" or the specified "quantity_pool" as model inputs. + Derives new quantities using the models on the knowledge graph using at least one quantity + from ``new_quantities`` and the remainder from either ``new_quantities`` or the specified + ``quantity_pool`` as inputs to the models. Args: - new_quantities ([Quantity]): list of quantities which to + new_quantities (`list` of `BaseQuantity`): list of quantities which to consider as new inputs to models - quantity_pool ({symbol: [Quantity]}): dict of quantity lists - keyed by symbol from which to draw additional quantities - for model inputs - allow_model_failure (bool): True allows graph evaluation to + quantity_pool (`dict` or `None`): optional, dict of lists of ``BaseQuantity`` objects, + keyed by their ``Symbol`` from which to draw additional quantities + for model inputs. Default: ``None`` (no pool) + allow_model_failure (bool): ``True`` allows graph evaluation to continue if an Exception is thrown during model evaluation for - violation of constraints or any other reason. False will + violation of constraints or any other reason. ``False`` will throw any Exception encountered during model evaluation. - Default: True - timeout (int): number of seconds to allow for a model to evaluate. + Default: ``True`` (ignore exceptions) + timeout (`int` or `None`): number of seconds to allow for a model to evaluate. After that time, model evaluation will be canceled and deemed - failed. "None" allows for infinite evaluation time. Default: None + failed. ``None`` allows for infinite evaluation time. Default: ``None`` (no limit) Returns: - additional_quantities ([Quantity]): new derived quantities - quantity_pool ({symbol: {Quantity}}): augmented version of - quantity pool + Tuple[list, dict]: returns a list and dict in a tuple: + + - derived quantities as `list` of `BaseQuantity` + - quantity pool augmented with quantities from ``new_quantities`` """ # Update quantity pool quantity_pool = quantity_pool or defaultdict(list) @@ -750,12 +835,13 @@ def derive_quantities(self, new_quantities, quantity_pool=None, models_and_input_sets, n_input_sets = self.generate_models_and_input_sets( new_quantities, quantity_pool) + # TODO: Maybe we should do this in evaluate() instead of here for quantity in new_quantities: quantity_pool[quantity.symbol].append(quantity) # input_tuples = [(v[0], v[1:]) for v in models_and_input_sets] - # This doesn't eliminate many and will be caught by cyclic filter + # The code below doesn't eliminate many and will be caught by cyclic filter # after evaluation. This is usually only important if the model we'd be # re-evaluating takes a long time, which the majority of our models do not # take a long time...can we move this into the generation step or just before @@ -789,15 +875,15 @@ def derive_quantities(self, new_quantities, quantity_pool=None, @staticmethod def _generates_noncyclic_output(input_set): """ - Helper function to determine if an input set of model and input quantities will - generate at least one output that is non-cyclic, meaning the output does not have - itself as an input in its provenance. + Determines if an input set of model and input quantities will generate at least one output that + is non-cyclic, meaning the output does not have itself as an input in its provenance. Args: - input_set (tuple(Model, list[Quantity]): input set to evaluate for cyclic outputs + input_set (tuple): input set to evaluate for cyclic outputs as a tuple of Model + and list of BaseQuantity objects. Returns: - (bool) True if at least one output of the model is non-cyclic. False if all outputs + bool: ``True`` if at least one output of the model is non-cyclic. ``False`` if all outputs are cyclic. """ model, inputs = input_set @@ -820,15 +906,18 @@ def _generates_noncyclic_output(input_set): @staticmethod def _run_serial(models_and_input_sets, allow_model_failure=True, timeout=None): """ - Evaluate a list of input sets serially. + Evaluates a list of input sets serially. Args: - models_and_input_sets (list[tuple(model, list[Quantity])]): input sets to evaluate - allow_model_failure (bool): True suppresses exceptions raised during model evaluation. Default: True - timeout (int): number of seconds after which to timeout model evaluation. Default: None (infinite) + models_and_input_sets (`list` of `tuple`): input sets to evaluate as a list of tuples containing + a Model and a list of BaseQuantity objects. + allow_model_failure (bool): optional, ``True`` suppresses exceptions raised during model evaluation. + ``False`` throws them as they are raised. Default: ``True`` (no exceptions raised) + timeout (int): optional, number of seconds after which to timeout model evaluation. + Default: ``None`` (no limit) Returns: - (list[Quantity]) output quantities from input set evaluation. + `list` of `BaseQuantity`: output quantities from input set evaluation. """ outputs = [] model_timings = [] @@ -856,17 +945,20 @@ def _run_parallel(executor, n_workers, models_and_input_sets, allow_model_failure=True, timeout=None): """ - Evaluate a list of input sets in parallel. + Evaluate a list of input sets in parallel. Args: executor (concurrent.futures.ProcessPoolExecutor): executor for input sets n_workers (int): number of processes used by executor - models_and_input_sets (list[tuple(model, list[Quantity])]): input sets to evaluate - allow_model_failure (bool): True suppresses exceptions raised during model evaluation. Default: True - timeout (int): number of seconds after which to timeout model evaluation. Default: None (infinite) + models_and_input_sets (`list` of `tuple`): input sets to evaluate as a list of tuples containing + a Model and a list of BaseQuantity objects. + allow_model_failure (bool): optional, ``True`` suppresses exceptions raised during model evaluation. + ``False`` throws them as they are raised. Default: ``True`` (no exceptions raised) + timeout (int): optional, number of seconds after which to timeout model evaluation. + Default: ``None`` (no limit) Returns: - (list[Quantity]) output quantities from input set evaluation. + `list` of `BaseQuantity`: output quantities from input set evaluation. """ func = partial(Graph._evaluate_model, allow_failure=allow_model_failure, @@ -892,20 +984,27 @@ def _run_parallel(executor, n_workers, models_and_input_sets, @staticmethod def _evaluate_model(model_and_input_set, allow_failure=True, timeout=None): """ - Workhorse function to evaluate an input set. + Evaluates an input set. + + Notes: + The exception is returned instead of thrown because in parallel, the exception will be suppressed + by the map() function used to execute the model evaluation in parallel. Additionally, the timings + are returned because they do not sum when run in parallel because timing is executed on different + processors. Args: - model_and_input_set (tuple(Model, list[Quantity])): input set to evaluate - allow_failure (bool): True suppresses exceptions raised during model evaluation. Default: True - timeout: number of seconds after which to timeout model evaluation. Default: None (infinite) + model_and_input_set (tuple): input set to evaluate as a tuple containing + a Model and a list of BaseQuantity objects. + allow_failure (bool): optional, ``True`` suppresses exceptions raised during model evaluation. + ``False`` throws them as they are raised. Default: ``True`` (no exceptions raised) Returns: - (list): List of quantities calculated from model. If model failed and allow_failure = True, will - return an empty list. If allow_failure = False, will return a list with a tuple - (Exception, input_set) as its only element. + Tuple[list, dict]: tuple containing the following data: - Note: The exception is returned instead of thrown because in parallel, the exception will be suppressed - by the map() function used to execute the model evaluation in parallel. + - list of quantities calculated from model. If model failed and allow_failure = True, will + return an empty list. If allow_failure = False, will return a list with a tuple + (Exception, input_set) as its only element. + - dictionary of timing data for this model """ # from chronic import Timer as Timer_ # from chronic import timings as timings_ @@ -945,21 +1044,22 @@ def _evaluate_model(model_and_input_set, allow_failure=True, timeout=None): def evaluate(self, material, allow_model_failure=True, timeout=None): """ Given a Material object as input, creates a new Material object - to include all derivable properties. Optional argument limits the - scope of which models or properties are tested. Returns a - reference to the new, augmented Material object. + to include all derivable properties for that material. + + Notes: + Model timeout does not work on non-Unix machines based on the implementation. ``timeout`` will + be ignored on these machines. Args: - material (Material): which material's properties will be expanded. - allow_model_failure (bool): whether to continue with graph evaluation - if a model fails. - timeout (int): number of seconds after which model evaluation should + material (Material): a material whose properties will be expanded + allow_model_failure (bool): optional, ``True`` continues with graph evaluation + if a model fails. ``False`` throws the exception. + Default: ``True`` (ignore failed models) + timeout (`int` or `None`): optional, number of seconds after which model evaluation should quit. This is to cut off long-running models. - Default: None (infinite evaluation time) + Default: ``None`` (infinite evaluation time) Returns: - (Material) reference to the newly derived material object. - - Note: Model timeout does not work on non-Unix machines based on the implementation. + Material: material object containing all properties, derived + original inputs """ logger.debug("Beginning evaluation") @@ -985,21 +1085,23 @@ def evaluate_composite(self, material, allow_model_failure=True, timeout=None): """ Given a CompositeMaterial object as input, creates a new CompositeMaterial - object to include all derivable properties. Returns a reference to - the new, augmented CompositeMaterial object. + object to include all derivable properties for that material. Args: - material (CompositeMaterial): material for which properties - will be expanded. - allow_model_failure (bool): True allows non-composite models to fail - during graph evaluation of a material. Default: True - allow_composite_model_failure (bool): True allows composite model evaluation - to fail during evaluation. Default: True - timeout (int): number of seconds after which to terminate non-composite - model evaluation. Default: None (infinite evaluation time) + material (CompositeMaterial): material whose properties + will be expanded + allow_model_failure (bool): optional, ``True`` continues with graph evaluation + if a non-CompositeModel fails. ``False`` throws the exception. + Default: ``True`` (ignore failed non-composite models) + allow_composite_model_failure (bool): ``True`` continues with graph evaluation + if a CompositeModel fails. ``False`` throws the exception. + Default: ``True`` (ignore failed composite models) + timeout (`int` or `None`): optional, number of seconds after which non-CompositeModel + evaluation should quit. This is to cut off long-running models. + Default: ``None`` (infinite evaluation time) Returns: - (Material) reference to the newly derived material object. + CompositeMaterial: composite material object containing all properties, derived + original inputs """ # TODO: Let's parallelize this eventually. It's not immediately obvious to me @@ -1110,21 +1212,25 @@ def clear_statistics(self): """ Clears model evaluation timings. - Note: if you are using chronic.Timer for timing outside this Graph object, - this function will clear your timers causing an error if the Timer objects - are currently running. + Notes: + If you are using the ``chronic.Timer`` module for timing outside this Graph object, + this function will clear your timers causing an error if the Timer objects + are currently running. """ self._graph_timings = None self._model_timings = None - clear() + clear_timings() @property def model_evaluation_statistics(self): """ - :return: A Pandas DataFrame containing statistics on how + Compiles a pandas DataFrame containing statistics on how many times each model was evaluated, average time per model, and the total time taken for that model. + + Returns: + pandas.DataFrame: model calculation statistics """ rows = [{'Model Name': model, @@ -1141,11 +1247,11 @@ def model_evaluation_statistics(self): @staticmethod def _append_timing_result(model_timings): """ - Helper function to append model timings collected from parallel processes to + Adds model timings collected from parallel processes to the timings module in this thread/process. Args: - model_timings (list[dict]): list of model timings returned from evaluation + model_timings (`list` of `dict`): list of model timings returned from evaluation """ if 'timings' not in timings['_graph_evaluation']: timings['_graph_evaluation']['timings'] = dict() diff --git a/propnet/core/materials.py b/propnet/core/materials.py index e0ebd9e8..9adb41f3 100644 --- a/propnet/core/materials.py +++ b/propnet/core/materials.py @@ -1,48 +1,62 @@ -""" -Module containing classes and methods for Material functionality in propnet code. +"""Materials objects to hold properties of materials for evaluation. + +This module establishes objects to represent single (Material) and mixed (CompositeMaterial) materials. +They are effectively containers for their properties (and processing conditions). The material objects +are the expected inputs for propnet's Graph class. + +Example: + Material objects can be instantiated empty or with a list of Quantity objects representing the + material's properties: + + >>> from propnet.core.materials import Material + >>> from propnet.core.quantity import QuantityFactory as QF + >>> band_gap = QF.create_quantity('band_gap', 5, 'eV') + >>> bulk_modulus = QF.create_quantity('bulk_modulus', 100, 'GPa') + >>> m = Material([band_gap, bulk_modulus]) # Initialize with list, or... + >>> m = Material() # Initialize empty and add properties + >>> m.add_quantity(band_gap) + >>> m.add_quantity(bulk_modulus) + """ +import logging from collections import defaultdict from itertools import chain -from propnet.core.quantity import QuantityFactory, NumQuantity +from propnet.core.quantity import QuantityFactory, NumQuantity, BaseQuantity from propnet.core.symbols import Symbol - -# noinspection PyUnresolvedReferences -import propnet.symbols from propnet.core.registry import Registry -import logging logger = logging.getLogger(__name__) +"""logging.Logger: Logger for debugging""" -class Material(object): +class Material: """ - Class containing methods for creating and interacting with Material objects. + Class containing methods to interact with materials with a single composition. This class is intended to + be a container for materials properties. - Under the Propnet infrastructure, Materials are the medium through which properties are - communicated. While Model and Symbol nodes create a web of interconnected properties, - Materials, as collections of Quantity nodes, provide concrete numbers to those properties. - At runtime, a Material can be constructed and added to a Graph instance, merging the two - graphs and allowing for propagation of concrete numbers through the property web. + Examples: + The example shown above largely demonstrates the utility of this class. However, it is worth noting + that a Material object can be accessed like a dictionary keyed by Symbol objects to retrieve the + set of quantities that correspond to that symbol. - A unique hashcode is stored with each Material upon instantiation. This is used to - differentiate between different materials at runtime. - - Attributes: - symbol_quantities_dict (dict>): data structure mapping Symbols to a list of corresponding - Quantity objects of that type. + >>> m = Material([...]) + >>> quantities = m['band_gap'] + >>> print(quantities) + {, ...} """ + def __init__(self, quantities=None, add_default_quantities=False): """ - Creates a Material instance, instantiating a trivial graph of one node. - Args: - quantities ([Quantity]): list of quantities to add to - the material - add_default_quantities (bool): whether to add default - quantities (e. g. room temperature) to the graph + quantities (`list` of `BaseQuantity` or `None`): optional, list of quantities to add to + the material. Default: ``None`` (no properties added) + add_default_quantities (bool): ``True`` adds default quantities (e.g. room temperature) + to the graph. ``False`` omits them. Default quantities are defined as Symbols who have + a default value specified and are registered in ``Registry('symbol_values')``. + Default: ``False`` (omit default quantities) """ self._quantities_by_symbol = defaultdict(set) if quantities is not None: @@ -54,38 +68,29 @@ def __init__(self, quantities=None, add_default_quantities=False): def add_quantity(self, quantity): """ - Adds a property to this property collection. + Adds a property to this material. Args: - quantity (Quantity): property to be bound to the material. - - Returns: - None + quantity (BaseQuantity): property to be bound to the material. """ self._quantities_by_symbol[quantity.symbol].add(quantity) def remove_quantity(self, quantity): """ - Removes the Quantity object attached to this Material. + Removes a quantity attached to this Material. Args: - quantity (Quantity): Quantity object reference indicating - which property is to be removed from this Material. - - Returns: - None + quantity (BaseQuantity): reference to quantity object to be removed """ if quantity.symbol not in self._quantities_by_symbol: - raise Exception("Attempting to remove quantity not present in " - "the material.") + raise KeyError("Attempting to remove quantity not present in " + "the material.") self._quantities_by_symbol[quantity.symbol].remove(quantity) def add_default_quantities(self): """ - Adds any default symbols which are not present in the graph - - Returns: - None + Adds any default symbols which are not present in the graph. Default symbols + are sourced from ``Registry('symbol_values')``. """ new_syms = set(Registry("symbol_values").keys()) new_syms -= set(self._quantities_by_symbol.keys()) @@ -97,47 +102,48 @@ def add_default_quantities(self): def remove_symbol(self, symbol): """ - Removes all Quantity Nodes attached to this Material of type symbol. + Removes all quantities attached to this material of a particular Symbol type. Args: - symbol (Symbol): object indicating which property type - is to be removed from this material. - - Returns: - None + symbol (Symbol): symbol to be removed from the material """ if symbol not in self._quantities_by_symbol: - raise Exception("Attempting to remove Symbol not present in the material.") + raise KeyError("Attempting to remove Symbol not present in the material.") del self._quantities_by_symbol[symbol] def get_symbols(self): """ - Obtains all Symbol objects bound to this Material. + Obtains all Symbol types bound to this material. Returns: - (set) set containing all symbols bound to this Material. + `set` of `propnet.core.symbols.Symbol`: set containing all symbols bound to this material """ return set(self._quantities_by_symbol.keys()) def get_quantities(self): """ - Method obtains all Quantity objects bound to this Material. + Obtains all quantity objects bound to this material. + Returns: - (list) list of all Quantity objects bound to this Material. + `list` of `propnet.core.quantity.BaseQuantity`: list of all quantity objects bound to this material """ return list(chain.from_iterable(self._quantities_by_symbol.values())) @property def symbol_quantities_dict(self): + """ + dict: mapping of Symbols to the set of quantities of that Symbol type attached to the material + """ + # TODO: This may not be safe enough. Might need deep copy. return self._quantities_by_symbol.copy() def get_aggregated_quantities(self): """ - Return mean values for all quantities for each symbol. + Aggregates multiple quantities of the same symbol by calculating their mean. Does not mutate this + Material object. Returns: - (dict>): data-structure - storing all properties / descriptors that arise from the - joining of multiple materials - materials (list): set of materials contained in the Composite + materials (`list` of `Material`): list of materials contained in the CompositeMaterial """ - def __init__(self, materials_list): + def __init__(self, materials): """ - Creates a Composite Material instance. - Args: - materials_list (list): list of materials contained - in the Composite + materials (`list` of `Material`): list of materials contained + in the CompositeMaterial """ - self.materials = materials_list + self.materials = materials super(CompositeMaterial, self).__init__() diff --git a/propnet/core/models.py b/propnet/core/models.py index d32d9e18..a5dc71eb 100644 --- a/propnet/core/models.py +++ b/propnet/core/models.py @@ -1,5 +1,74 @@ -""" -Module containing classes and methods for Model functionality in propnet code. +"""Models representing connections between materials properties. + +This module contains classes that represent models, or the way that materials properties +are connected to one another. propnet features several kinds of models: + +- *EquationModel*: used for relationships between properties of a single material easily expressed as a + simple mathematical equation +- *PyModel*: custom Python modules used for more complex relationships between properties of a single material + not easily expressed with a simple mathematical equation +- *CompositeModel*: Python-based models for calculating properties for mixed/composite materials + +These three classes are the main model types used. `PyModuleModel` and `PyModuleCompositeModel` are not intended +for direct instantiation by the user, but are helper classes for constructing `PyModel` objects from the templated +Python modules in propnet's core model library. + +The `Constraint` class is meant to function similar to an `EquationModel` and constrains the values of properties +used in a model. They are initialized with an equality or inequality statement that must be satisfied for the input +or output of the model to be considered valid. These `Constraint` objects can be passed to a model in the +``'constraints'`` keyword. + +The recommended approach to creating models is by following the template approach, as described in the demo +iPython notebook of the repository (``/demo/Getting Started.ipynb``). Once a YAML (for equation-based models) +or a Python module template (for more complex models or composite material models) is completed, place them in the +correct directory under ``/models/`` and they will be imported with ``import propnet.models``. +However, each model type can be constructed manually. See the individual classes for examples. + +Examples: + There are two methods to run a model on some input data: ``plug_in()`` and ``evaluate()``. The distinction + between the two methods is in the format of the inputs and how that data is handled. Both methods take dictionaries + as input arguments. + + As an example, say we have an equation-based model ``model`` which relates some property `symbol_a` to some + property `symbol_b` by ``B = 2 * A**3`` where `A` is the variable representing the value of `symbol_a` in units + `unit_of_a` and `B` represents the value of `symbol_b` in `unit_of_b` where ``unit_of_b = unit_of_a**3``. + + For ``plug_in()``, the method expects the input dict's keys to be the variable names in the + equation or the expected variable names in the Python module. The values are expected to be the raw data, ready + to be used by the Python module or plugged into an equation without change. So, they must be of the correct Python + type and unit/dimensionality (for numerical inputs). For our example model: + + >>> model.plug_in({'A': 5}) + {'B': 250} + + The output will be variable keyed with raw values as well. Raw in, raw out. + + For ``evaluate()``, the method expects the input dict's keys to be the symbol names the model requires and the + values are expected to be Quantity objects of those symbol types. ``evaluate()`` or the Quantity class will take + care of any data type or unit conversions necessary to run the model. For our example model: + + >>> from propnet.core.quantity import QuantityFactory as QF + >>> output = model.evaluate({'symbol_a': QF.create_quantity('symbol_a', 5, 'unit_of_a')}) + >>> print(output) + {'symbol_b': , 'successful': True} + >>> type(output['symbol_b']) + propnet.core.quantity.NumQuantity + + The output will be symbol indexed with Quantity outputs and a flag to indicate if the model was successfully + evaluated. ``evaluate()`` also has more sophisticated checks to ensure that the input and output is valid and + does not violate any constraints placed on the model or the symbols that it is using. + + For example, if `A` could have different units `other_a_unit` where ``unit_of_a = 100 * other_a_unit``, + ``evaluate()`` would know how to handle the unit conversion, if needed, whereas ``plug_in()`` does not. + + >>> model.evaluate({'symbol_a': QF.create_quantity('symbol_a', 0.05, 'other_a_unit')}) + {'symbol_b': , 'successful': True} + >>> model.plug_in({'A': 0.05}) + {'B': 0.00025} + + If the model requires the inputs be in certain units to be evaluated correctly (i.e. if your model is empirical), + models can be designed to convert units automatically upon calling ``evaluate()`` by invoking the + ``units_for_evaluation`` keyword upon instantiation. """ import os @@ -9,6 +78,7 @@ from itertools import chain import warnings import six +from copy import deepcopy from monty.serialization import loadfn from monty.json import MSONable, MontyDecoder @@ -33,11 +103,25 @@ class Model(ABC): """ - Abstract model class for all models appearing in propnet - + Abstract model class for all models appearing in propnet. All models will have the attributes and + functions described here, although some will behave differently based on implementation. + + Attributes: + name (str): unique name for the model + description (str): information about the model and its origin, constraints, etc. This information + is displayed on the website. + display_names (`list` of `str`): nicely formatted name(s) of the model + categories (`list` of `str`): metadata categorizing the model ("empirical", "electronic", + "mechanical", etc.) + implemented_by (`list` of `str`): list of authors (GitHub usernames, preferably) who implemented + the model + references (`list` of `str`): BibTeX strings of scholarly references for the model + constraints (`list` of `propnet.core.models.Constraint`): list of Constraint objects representing + requirements that must be met for inputs/outputs to the model """ _registry_name = "models" + """str: name of the Registry in which to register instances of this class""" def __init__(self, name, connections, constraints=None, display_names=None, description=None, categories=None, references=None, implemented_by=None, @@ -45,58 +129,75 @@ def __init__(self, name, connections, constraints=None, display_names=None, is_builtin=False, register=True, overwrite_registry=True): """ - Abstract base class for model implementation. - Args: - name (str): title of the model + name (str): unique name for the model connections (`list` of `dict`): list of connections dictionaries, which take - the form ``{"inputs": [variables], "outputs": [variables]}``, e. g.: + the form ``{"inputs": [variables], "outputs": [variables]}``, e.g.: ``connections = [{"inputs": ["p", "T"], "outputs": ["V"]},`` ``{"inputs": ["T", "V"], "outputs": ["p"]}]`` - constraints (str, Constraint): string expressions or + constraints (str, Constraint, None): optional, string expressions or Constraint objects of some condition on which the model is valid, e. g. ``"n > 0"``, note that this must include variables if - there is a variable_symbol_map - display_names (`list` of `str`): optional, list of alternative names to use for - display - description (str): long form description of the model - categories (`list` of `str`): list of categories applicable to + there is a ``variable_symbol_map``. Default: ``None`` (no constraints) + display_names (`list` of `str`, `None`): optional, list of formatted names to use for + display. Default: ``None`` (sets ``display_name`` property to a list with one item, + which is the name in title case with underscores replaced with spaces, + e.g. "name_of_a_model" would become "Name of a Model") + description (str, None): optional, long form description of the model + categories (`list` of `str`, `None`): optional, list of categories applicable to the model - references (`list` of `str`): list of the informational links - explaining / supporting the model - implemented_by (`list` of `str`): list of authors of the model by their - github usernames - variable_symbol_map (dict): mapping of variable strings enumerated - in the plug-in method to canonical symbols, e. g. - ``{"n": "index_of_refraction"}`` etc. - units_for_evaluation (`str`, `dict`): if specified, coerces the units of + references (`list` of `str`, `None`): list of the informational links + explaining / supporting the model. These strings should be a BibTeX string or + in the form ``"type:info"`` where ``type`` is one of "url", "doi", or "isbn" + and ``info`` contains the relevant data. These types will be automatically converted + to BibTeX strings by automatic lookup using ``propnet.core.utils.references_to_bib()``. + Users should verify the accuracy of the generated BibTeX strings. + Default: ``None`` (no references...please add them if contributing!) + implemented_by (`list` of `str`, `None`): optional, list of authors of the model by their + GitHub usernames. Default: ``None`` (no authors...please attribute authorship if + contributing!) + variable_symbol_map (dict, None): optional, mapping of variables used as inputs/outputs to symbol names + (e.g. ``{"n": "index_of_refraction"}`` etc.). Default: ``None`` (all inputs/outputs are + named exactly the same as they symbols they represent) + units_for_evaluation (str, dict, None): optional, if specified, coerces the units of inputs prior to evaluation and outputs post-evaluation to the units - specified. If not specified, the inputs/outputs are not used as is. - If ``units_for_evaluation = 'default'``, all inputs/outputs will be - converted to the unit specified by the associated Symbol object. If + specified. If ``units_for_evaluation = 'default'``, all inputs/outputs will be + converted to the unit specified by their associated Symbol object. If ``units_for_evaluation`` is a variable-keyed dict, the inputs/outputs - will be converted to the units specified in the dict. If a variable is + will be converted to the units specified as values in the dict. If a variable is missing, it will be converted to the unit of the associated Symbol object. - test_data (`list` of `dict`): test data with - which to evaluate the model. Format: + Default: ``'default'`` (all inputs/outputs are converted to canonical units for evaluation) + test_data (`list` of `dict`, `None`): optional, test data with + which to evaluate the model to verify correct function. Format: ``{'input': {variable: value}, 'output': {variable: value}}`` where `value` - can be a string with unit ('1.0 kg'), BaseQuantity object, or bare number. + can be a string with unit (``'1.0 kg'``), BaseQuantity object, or bare number. Bare numbers will be assumed to be the units specified by ``units_for_evaluation``. If ``units_for_evaluation`` is not specified, units will be assumed from the - associated Symbol object. - is_builtin (bool): True if the model is a default model included with propnet - (this option not intended to be set by users) - register (bool): True registers the model with the model registry named by - ``self._registry_name`` - overwrite_registry (bool): True overwrites the model registry if a model with - the same name exists. False throws an error if a model with the same name - exists in the registry. + associated Symbol object. Default: ``None`` (no test data) + is_builtin (bool): **This option not intended to be set by users.** ``True`` if the model + is included in propnet's core model library. + Default: ``False`` (model is not in core library) + register (bool): ``True`` registers the model with the model registry named by + ``self._registry_name``. ``False`` instantiates the object without registering it. + Registration fails if the name exists in the registry and ``overwrite_registry=False``. + Default: ``True`` (register the model) + overwrite_registry (bool): ``True`` overwrites the model registry if a model with + the same name exists. ``False`` throws an error if a model with the same name + exists in the registry. Default: ``True`` (replace same-named models) """ self.name = name self._connections = connections self.description = description - self.display_names = display_names + if display_names: + if isinstance(display_names, str): + self.display_names = [display_names] + else: + self.display_names = display_names + elif self.name: + self.display_names = [self.name.replace("_", " ").title()] + else: + self.display_names = [] if isinstance(categories, str): categories = [categories] self.categories = categories or [] @@ -106,11 +207,20 @@ def __init__(self, name, connections, constraints=None, display_names=None, self.references = references_to_bib(references or []) self._is_builtin = is_builtin - # variable symbol map initialized as symbol name->symbol, then updated + # TODO: Should probably make the variable_symbol_map always be keyed + # by a string and valued by a symbol object for consistency + # TODO: The creation of the VSM is weirdly cyclic but it works. Is there + # a better way to do this? + # variable symbol map initialized as symbol name->symbol name, then updated # with any customization of variable to symbol mapping - self._variable_symbol_map = {k: k for k in self.all_symbols} - self._variable_symbol_map.update(variable_symbol_map or {}) + self._variable_symbol_map = {k: k for k in self.all_input_variables | self.all_output_variables} + if variable_symbol_map: + model_variable_symbol_map = {v: s for v, s in variable_symbol_map.items() + if v in self._variable_symbol_map} + self._variable_symbol_map.update(model_variable_symbol_map) self._verify_symbols_are_registered() + self._variable_symbol_map = {v: Registry("symbols").get(s) or s + for v, s in self._variable_symbol_map.items()} if units_for_evaluation or 'empirical' in self.categories: self._variable_unit_map = {prop_name: Registry("units").get(prop_name) @@ -131,7 +241,7 @@ def __init__(self, name, connections, constraints=None, display_names=None, self.constraints.append(constraint) else: self.constraints.append(Constraint(constraint, - variable_symbol_map=self._variable_symbol_map)) + variable_symbol_map=variable_symbol_map)) # Ensures our test data is variable-keyed and in the correct format test_data = test_data or self.load_test_data() @@ -155,7 +265,6 @@ def register(self, overwrite_registry=True): Raises: KeyError: if `overwrite_registry=False` and a model with the same name is already registered, this error is raised. - """ if not overwrite_registry and self.name in Registry(self._registry_name).keys(): raise KeyError("Model '{}' already exists in the registry '{}'".format(self.name, @@ -164,7 +273,7 @@ def register(self, overwrite_registry=True): def unregister(self): """ - Removes the symbol from all applicable registries. + Removes the model from its registry. """ Registry(self._registry_name).pop(self.name, None) @@ -175,14 +284,14 @@ def registered(self): Indicates if a model is registered with the model registry. Returns: - bool: True if the model is registered. False otherwise. + bool: ``True`` if the model is registered. ``False`` otherwise. """ return self.name in Registry(self._registry_name) def _clean_test_data(self, test_data): """ - Coerces test data into a value-unit format. + Coerces test data into a value-unit string format (e.g. "5.0 kg"). Args: test_data (`list` of `dict`): structured test data (see ``__init__()``) @@ -235,84 +344,145 @@ def _verify_symbols_are_registered(self): @property def is_builtin(self): """ - Indicates whether the model is a propnet built-in. + Indicates whether the model is in the propnet core library. Returns: - bool: ``True`` if the model is a built-in, ``False`` + bool: ``True`` if the model is in the core library, ``False`` if it is a custom-created model """ return self._is_builtin @property def connections(self): + """`list` of `dict`: list of connections dictionaries, which take + the form: + + >>> {"inputs": [variables], "outputs": [variables]} + + For example: + + >>> connections = [ + >>> {"inputs": ["p", "T"], "outputs": ["V"]}, + >>> {"inputs": ["T", "V"], "outputs": ["p"]} + >>> ] + + """ return self._connections @property def variable_unit_map(self): + """dict: variables mapped to the units, as ``pint`` Unit objects, + representing the units used for the variables in the model. If a + variable does not have a unit, the value is ``None``. + """ return {k: ureg.Unit(v) if v is not None else None for k, v in self._variable_unit_map.items()} @property def variable_symbol_map(self): + """dict: variables mapped to the Symbol types that those variables represent + """ return self._variable_symbol_map @abstractmethod def plug_in(self, variable_value_dict): """ - Plugs in a variable to quantity dictionary + Evaluates the model by plugging values in directly from + a variable to value dictionary. The dictionary is keyed with + variable names expected by the model, valued by Python + data types expected by the model. + + **This function is abstract and must be implemented by subclasses.** Args: variable_value_dict (dict): a mapping - of variables to values to be substituted - into the model to yield output + of variable names as keys to raw data values + to be substituted into the model to yield output, + e.g. ``{'A': 5}`` Returns: dict: output variables with associated - values generated from the input + raw data values generated from the input """ return def map_symbols_to_variables(self, symbols): """ - Helper method to convert symbol-keyed dictionary or list to - variable-keyed dictionary or list + Converts a symbol-keyed dictionary or list of symbols to + variable-keyed dictionary or list of variables as defined + for this model. Args: symbols (list or dict): list of symbols or symbol- keyed dictionary - Returns (list or dict): - list of variables or variable-keyed dict + Returns: + `list` or `dict`: same object type as ``symbols`` with symbols + replaced with variables """ rev_map = {v: k for k, v in getattr(self, "variable_symbol_map", {}).items()} return remap(symbols, rev_map) def map_variables_to_symbols(self, variables): """ - Helper method to convert variable-keyed dictionary or list to - symbol-keyed dictionary or list + Converts a variable-keyed dictionary or a list/set of variable names to + symbol-keyed dictionary or list/set of symbols as defined + for this model. If two dictionary keys or items in the set correspond + to the same Symbol, only one instance will be returned. Args: variables (`list`, `dict`, `set`): list of variables or variable-keyed dictionary Returns: - `list` or `dict` or `set: list of symbols or symbol-keyed dict + `list` or `dict` or `set`: same object type as ``variables`` with variables + replaced with Symbols. """ return remap(variables, getattr(self, "variable_symbol_map", {})) def _convert_inputs_for_plugin(self, inputs): + """ + Converts input BaseQuantity objects to the format required by ``plug_in()`` + including unit conversion. + + **This function is not abstract, but may require different behavior for subclasses.** + + In this implementation, the function converts numerical quantities to the units specified + in the ``variable_unit_map`` and returns the raw numerical value only. For other objects, + it returns the object stored in the BaseQuantity object's "value" field. + + Args: + inputs (dict): variable-keyed dictionary with values as BaseQuantity objects + representing inputs to be converted for model evaluation + + Returns: + dict: dictionary with values converted to the required raw data format + """ converted_inputs = {} for var, quantity in inputs.items(): converted_inputs[var] = quantity.value if self.variable_unit_map.get(var) is not None: - # Units are being assumed by equation and we need to strip them - # or pint might get angry if it has to add or subtract quantities - # with unmatched dimensions + # Convert units and return only the magnitude as to not + # have issues with dimension mismatch converted_inputs[var] = quantity.to(self.variable_unit_map[var]).magnitude return converted_inputs def _convert_outputs_from_plugin(self, outputs): + """ + Converts output raw data from ``plug_in()`` to the correct type and assigns units as needed. + + **This function is not abstract, but may require different behavior for subclasses.** + + In this implementation, raw numerical outputs are converted to pint Quantity objects with the + units specified in the ``variable_unit_map``. Other outputs are returned as is. + + Args: + outputs (dict): variable-keyed dictionary with values as raw data + representing outputs to be converted to BaseQuantity objects + + Returns: + dict: dictionary with values converted to objects ready to be converted to BaseQuantity objects + """ converted_outputs = {} for var, quantity in outputs.items(): symbol = self._variable_symbol_map[var] @@ -336,31 +506,40 @@ def _convert_outputs_from_plugin(self, outputs): def evaluate(self, symbol_quantity_dict, allow_failure=True, raise_timeout_errors=True): """ - Given a set of symbol values, performs error checking to see - if the corresponding input variable values represents a valid - input set based on the self.connections() method. If so, returns - a dictionary representing the value of plug_in() applied to the - input. The dictionary contains a "successful" key - representing if plug_in() was successful. + Evaluates the model given Quantity objects corresponding to a + valid input set to the model. If the input set is invalid, the + model will fail to evaluate, throwing an error or more gracefully + exiting, depending on the setting for ``allow_failure``. The key distinction between evaluate() and plug_in() is symbols in, symbols out vs. variables in, variables out. In addition, - evaluate also handles any requisite unit mapping. + evaluate also handles any required unit conversion and data type + coercion. Args: symbol_quantity_dict (dict): a mapping of - symbol names (str) to quantities (BaseQuantity) to be substituted + symbol names (str) to quantities (BaseQuantity) to be evaluated allow_failure (bool): whether or not to catch - errors in model evaluation - raise_timeout_errors (bool): True ignores the value of "allow_failure" + errors in model evaluation. ``True`` catches errors and exits + gracefully, returning ``'successful' = False`` in the output with + a descriptive error message in ``'message'``. The behavior of ``True`` + can be modified for TimeoutError with ``raise_timeout_errors``. + ``False`` raises exceptions as they occur during model evaluation. + Default: ``True`` (exit gracefully) + raise_timeout_errors (bool): ``True`` ignores the value of ``allow_failure`` and forces TimeoutError exceptions to be raised. This is so they - can be caught and handled by Graph, as timeouts are implemented there - and not in Model. + can be caught and handled by ``Graph`` or another wrapping function + which forcibly terminates models after a certain time. (Note: this + functionality may be moved within Model to make this kwarg obsolete) Returns: - dict: dictionary of output symbols with associated values - generated from the input, along with "successful" if the - substitution succeeds + dict: dictionary of output from the model: + + - Symbol name (str, Symbol): output values generated from the input + as BaseQuantity objects, keyed by symbol name + - ``'successful'`` (bool): ``True`` if the model evaluation succeeds, ``False`` otherwise + - ``'message'`` (str): contains an error message if ``'successful'=False`` + and ``allow_failure=True`` """ # Remap symbols and units if symbol map isn't none @@ -403,7 +582,9 @@ def evaluate(self, symbol_quantity_dict, allow_failure=True, raise_timeout_error # at the time, which is usually in "plug_in". We want to optionally raise # TimeoutErrors so they are caught by Graph() or an error handler otherwise # outside of this class. + # TODO: Move timeout functionality here so we don't need the raise_timeout_errors kwarg if not allow_failure or (isinstance(err, TimeoutError) and raise_timeout_errors): + # TODO: Maybe write custom error handler so it cleans up all these return statements raise err else: return {"successful": False, @@ -446,6 +627,8 @@ def evaluate(self, symbol_quantity_dict, allow_failure=True, raise_timeout_error out[symbol] = quantity + # TODO: Change schema for output so results are under a key like "result/output". That way + # we won't accidentally have any key clashes with symbols out['successful'] = True return out @@ -645,6 +828,8 @@ def check_constraints(self, input_symbols): return False return True + # TODO: Can we roll this into the model module itself rather than hard coding + # this file path and loading from there? def load_test_data(self, test_data_path=None, deserialize=True): """ Loads test data from preset or specified directory. @@ -760,8 +945,24 @@ def __eq__(self, rhs): class EquationModel(Model, MSONable): """ - Equation model is a Model subclass which is invoked - from a list of equations + For models which are simple equations, use the ``EquationModel`` class. To construct a relationship between + some property `symbol_a` and some other property `symbol_b`, both of which are registered in the propnet + Registry, you can do the following: + + >>> from propnet.core.models import EquationModel + >>> em = EquationModel('my_model', ['symbol_b = 2*symbol_a**3']) + >>> em.plug_in({'symbol_a': 5}) + {'symbol_b': 250} + + Equations must be strings parsable by the ``sympy`` package. Note that the variable names in the equation are + the same names as the symbols. If you would rather use a symbolic representation of the symbol names, + specify the ``variable_symbol_map`` upon creation: + + >>> from propnet.core.quantity import QuantityFactory as QF + >>> em = EquationModel('my_model', ['B = 2*A**3'], + >>> variable_symbol_map={'A': 'symbol_a', 'B': 'symbol_b'}) + >>> em.plug_in({'A': 5}) + {'B': 250} """ def __init__(self, name, equations, connections=None, constraints=None, @@ -853,7 +1054,10 @@ def __init__(self, name, equations, connections=None, constraints=None, def as_dict(self): d = {k if not k.startswith("_") else k.split('_', 1)[1]: v for k, v in self.__getstate__().items()} - d['units_for_evaluation'] = d.pop('unit_map') + d['units_for_evaluation'] = d.pop('variable_unit_map') + del d['is_builtin'] + for connection in d['connections']: + del connection['_sympy_exprs'] return d @classmethod @@ -877,7 +1081,7 @@ def _generate_lambdas(self): connection['_lambdas'][output_var] = sp_lambda def __getstate__(self): - d = self.__dict__.copy() + d = deepcopy(self.__dict__) for connection in d['_connections']: if '_lambdas' in connection.keys(): del connection['_lambdas'] diff --git a/propnet/data/reference_cache.json b/propnet/data/reference_cache.json index b5896458..a19e9938 100644 --- a/propnet/data/reference_cache.json +++ b/propnet/data/reference_cache.json @@ -1 +1 @@ -{"url:http://hyperphysics.phy-astr.gsu.edu/hbase/thermo/thercond.html#c2": "@misc{url:249720,\n url = {http://hyperphysics.phy-astr.gsu.edu/hbase/thermo/thercond.html#c2}\n }", "doi:10.1039/C4EE03157A": "@article{Yan_2015,\n\tdoi = {10.1039/c4ee03157a},\n\turl = {https://doi.org/10.1039%2Fc4ee03157a},\n\tyear = 2015,\n\tpublisher = {Royal Society of Chemistry ({RSC})},\n\tvolume = {8},\n\tnumber = {3},\n\tpages = {983--994},\n\tauthor = {Jun Yan and Prashun Gorai and Brenden Ortiz and Sam Miller and Scott A. Barnett and Thomas Mason and Vladan Stevanovi{\\'{c}} and Eric S. Toberer},\n\ttitle = {Material descriptors for predicting thermoelectric performance},\n\tjournal = {Energy {\\&} Environmental Science}\n}", "url:https://en.wikipedia.org/wiki/Refractive_index#Complex_refractive_indexf": "@misc{url:103965,\n url = {https://en.wikipedia.org/wiki/Refractive_index#Complex_refractive_indexf}\n }", "url:https://en.wikipedia.org/wiki/Refractive_index#Relative_permittivity_and_permeability": "@misc{url:153771,\n url = {https://en.wikipedia.org/wiki/Refractive_index#Relative_permittivity_and_permeability}\n }", "url:https://en.wikipedia.org/wiki/Refractive_index#Relative_permittivity_and_permeability}": "@misc{url:368625,\n url = {https://en.wikipedia.org/wiki/Refractive_index#Relative_permittivity_and_permeability}}\n }", "doi:10.1021/co200012w": "@article{Setyawan_2011,\n\tdoi = {10.1021/co200012w},\n\turl = {https://doi.org/10.1021%2Fco200012w},\n\tyear = 2011,\n\tmonth = {jun},\n\tpublisher = {American Chemical Society ({ACS})},\n\tvolume = {13},\n\tnumber = {4},\n\tpages = {382--390},\n\tauthor = {Wahyu Setyawan and Romain M. Gaume and Stephanie Lam and Robert S. Feigelson and Stefano Curtarolo},\n\ttitle = {High-Throughput Combinatorial Database of Electronic Band Structures for Inorganic Scintillator Materials},\n\tjournal = {{ACS} Combinatorial Science}\n}", "url:https://en.wikipedia.org/wiki/Schmid%27s_law": "@misc{url:475884,\n url = {https://en.wikipedia.org/wiki/Schmid%27s_law}\n }", "doi:10.1016/j.infrared.2006.04.001": "@article{Ravindra_2007,\n\tdoi = {10.1016/j.infrared.2006.04.001},\n\turl = {https://doi.org/10.1016%2Fj.infrared.2006.04.001},\n\tyear = 2007,\n\tmonth = {mar},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {50},\n\tnumber = {1},\n\tpages = {21--29},\n\tauthor = {N.M. Ravindra and Preethi Ganapathy and Jinsoo Choi},\n\ttitle = {Energy gap{\\textendash}refractive index relations in semiconductors {\\textendash} An overview},\n\tjournal = {Infrared Physics {\\&} Technology}\n}", "doi:10.1002/pssb.2221310202": "@article{Moss_1985,\n\tdoi = {10.1002/pssb.2221310202},\n\turl = {https://doi.org/10.1002%2Fpssb.2221310202},\n\tyear = 1985,\n\tmonth = {oct},\n\tpublisher = {Wiley},\n\tvolume = {131},\n\tnumber = {2},\n\tpages = {415--427},\n\tauthor = {T. S. Moss},\n\ttitle = {Relations between the Refractive Index and Energy Gap of Semiconductors},\n\tjournal = {physica status solidi (b)}\n}", "doi:10.1002/pssb.2221000240": "@article{Gupta_1980,\n\tdoi = {10.1002/pssb.2221000240},\n\turl = {https://doi.org/10.1002%2Fpssb.2221000240},\n\tyear = 1980,\n\tmonth = {aug},\n\tpublisher = {Wiley},\n\tvolume = {100},\n\tnumber = {2},\n\tpages = {715--719},\n\tauthor = {V. P. Gupta and N. M. Ravindra},\n\ttitle = {Comments on the Moss Formula},\n\tjournal = {physica status solidi (b)}\n}", "doi:10.1016/j.commatsci.2012.10.028": "@article{Ong_2013,\n\tdoi = {10.1016/j.commatsci.2012.10.028},\n\turl = {https://doi.org/10.1016%2Fj.commatsci.2012.10.028},\n\tyear = 2013,\n\tmonth = {feb},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {68},\n\tpages = {314--319},\n\tauthor = {Shyue Ping Ong and William Davidson Richards and Anubhav Jain and Geoffroy Hautier and Michael Kocher and Shreyas Cholia and Dan Gunter and Vincent L. Chevrier and Kristin A. Persson and Gerbrand Ceder},\n\ttitle = {Python Materials Genomics (pymatgen): A robust, open-source python library for materials analysis},\n\tjournal = {Computational Materials Science}\n}", "url:https://onlinelibrary.wiley.com/doi/abs/10.1002/pssb.2221630145": "@misc{url:502475,\n url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/pssb.2221630145}\n }", "url:https://en.wikipedia.org/wiki/Peierls_stress}": "@misc{url:886830,\n url = {https://en.wikipedia.org/wiki/Peierls_stress}}\n }", "url:https://en.wikipedia.org/wiki/Refractive_index#Complex_refractive_index": "@misc{url:715955,\n url = {https://en.wikipedia.org/wiki/Refractive_index#Complex_refractive_index}\n }", "url:https://en.wikipedia.org/wiki/Electrical_resistivity_and_conductivity": "@misc{url:805288,\n url = {https://en.wikipedia.org/wiki/Electrical_resistivity_and_conductivity}\n }", "doi:10.1021/acs.jpcc.7b07421": "@article{Morales_Garc_a_2017,\n\tdoi = {10.1021/acs.jpcc.7b07421},\n\turl = {https://doi.org/10.1021%2Facs.jpcc.7b07421},\n\tyear = 2017,\n\tmonth = {aug},\n\tpublisher = {American Chemical Society ({ACS})},\n\tvolume = {121},\n\tnumber = {34},\n\tpages = {18862--18866},\n\tauthor = {{\\'{A}}ngel Morales-Garc{\\'{\\i}}a and Rosendo Valero and Francesc Illas},\n\ttitle = {An Empirical, yet Practical Way To Predict the Band Gap in Solids by Using Density Functional Band Structure Calculations},\n\tjournal = {The Journal of Physical Chemistry C}\n}", "url:https://unlcms.unl.edu/cas/physics/tsymbal/teaching/SSP-927/Section%2013_Optical_Properties_of_Solids.pdf": "@misc{url:121737,\n url = {https://unlcms.unl.edu/cas/physics/tsymbal/teaching/SSP-927/Section%2013_Optical_Properties_of_Solids.pdf}\n }", "doi:10.1016/S0257-8972(02)00593-5": "@article{Clarke_2003,\n\tdoi = {10.1016/s0257-8972(02)00593-5},\n\turl = {https://doi.org/10.1016%2Fs0257-8972%2802%2900593-5},\n\tyear = 2003,\n\tmonth = {jan},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {163-164},\n\tpages = {67--74},\n\tauthor = {David R. Clarke},\n\ttitle = {Materials selection guidelines for low thermal conductivity thermal barrier coatings},\n\tjournal = {Surface and Coatings Technology}\n}", "doi:10.1039/C6TA04121C": "@article{Gorai_2016,\n\tdoi = {10.1039/c6ta04121c},\n\turl = {https://doi.org/10.1039%2Fc6ta04121c},\n\tyear = 2016,\n\tpublisher = {Royal Society of Chemistry ({RSC})},\n\tvolume = {4},\n\tnumber = {28},\n\tpages = {11110--11116},\n\tauthor = {Prashun Gorai and Eric S. Toberer and Vladan Stevanovi{\\'{c}}},\n\ttitle = {Computational identification of promising thermoelectric materials among known quasi-2D binary compounds},\n\tjournal = {Journal of Materials Chemistry A}\n}", "doi:10.1021/acs.nanolett.6b05229": "@article{Cheon_2017,\n\tdoi = {10.1021/acs.nanolett.6b05229},\n\turl = {https://doi.org/10.1021%2Facs.nanolett.6b05229},\n\tyear = 2017,\n\tmonth = {feb},\n\tpublisher = {American Chemical Society ({ACS})},\n\tvolume = {17},\n\tnumber = {3},\n\tpages = {1915--1923},\n\tauthor = {Gowoon Cheon and Karel-Alexander N. Duerloo and Austin D. Sendek and Chase Porter and Yuan Chen and Evan J. Reed},\n\ttitle = {Data Mining for New Two- and One-Dimensional Weakly Bonded Solids and Lattice-Commensurate Heterostructures},\n\tjournal = {Nano Letters}\n}", "doi:10.1021/cm400893e": "@article{Gaultois_2013,\n\tdoi = {10.1021/cm400893e},\n\turl = {https://doi.org/10.1021%2Fcm400893e},\n\tyear = 2013,\n\tmonth = {may},\n\tpublisher = {American Chemical Society ({ACS})},\n\tvolume = {25},\n\tnumber = {15},\n\tpages = {2911--2920},\n\tauthor = {Michael W. Gaultois and Taylor D. Sparks and Christopher K. H. Borg and Ram Seshadri and William D. Bonificio and David R. Clarke},\n\ttitle = {Data-Driven Review of Thermoelectric Materials: Performance and Resource Considerations},\n\tjournal = {Chemistry of Materials}\n}", "url:https://en.wikipedia.org/wiki/Elastic_modulus": "@misc{url:169345,\n url = {https://en.wikipedia.org/wiki/Elastic_modulus}\n }", "url:https://link.springer.com/article/10.1134/1.1666949": "@misc{url:560228,\n url = {https://link.springer.com/article/10.1134/1.1666949}\n }", "doi:10.1007/0-387-25100-6_2": "@incollection{Morelli,\n\tdoi = {10.1007/0-387-25100-6_2},\n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2},\n\tpublisher = {Springer-Verlag},\n\tpages = {37--68},\n\tauthor = {Donald T. Morelli and Glen A. Slack},\n\ttitle = {High Lattice Thermal Conductivity Solids},\n\tbooktitle = {High Thermal Conductivity Materials}\n}", "doi:10.1002/adfm.201600718": "@article{Wei_2016,\n\tdoi = {10.1002/adfm.201600718},\n\turl = {https://doi.org/10.1002%2Fadfm.201600718},\n\tyear = 2016,\n\tmonth = {may},\n\tpublisher = {Wiley},\n\tvolume = {26},\n\tnumber = {29},\n\tpages = {5360--5367},\n\tauthor = {Ping Wei and Jiong Yang and Liang Guo and Shanyu Wang and Lihua Wu and Xianfan Xu and Wenyu Zhao and Qingjie Zhang and Wenqing Zhang and Mildred S. Dresselhaus and Jihui Yang},\n\ttitle = {Minimum Thermal Conductivity in Weak Topological Insulators with Bismuth-Based Stack Structure},\n\tjournal = {Advanced Functional Materials}\n}", "url:https://en.wikipedia.org/wiki/Gr%C3%BCneisen_parameter": "@misc{url:418561,\n url = {https://en.wikipedia.org/wiki/Gr%C3%BCneisen_parameter}\n }", "doi:10.1063/1.2737054": "@article{Chakravarty_2007,\n\tdoi = {10.1063/1.2737054},\n\turl = {https://doi.org/10.1063%2F1.2737054},\n\tyear = 2007,\n\tmonth = {may},\n\tpublisher = {{AIP} Publishing},\n\tvolume = {126},\n\tnumber = {20},\n\tpages = {204508},\n\tauthor = {Charusita Chakravarty and Pablo G. Debenedetti and Frank H. Stillinger},\n\ttitle = {Lindemann measures for the solid-liquid phase transition},\n\tjournal = {The Journal of Chemical Physics}\n}", "doi:10.1103/PhysRevB.22.3790": "@article{Stillinger_1980,\n\tdoi = {10.1103/physrevb.22.3790},\n\turl = {https://doi.org/10.1103%2Fphysrevb.22.3790},\n\tyear = 1980,\n\tmonth = {oct},\n\tpublisher = {American Physical Society ({APS})},\n\tvolume = {22},\n\tnumber = {8},\n\tpages = {3790--3794},\n\tauthor = {F. H. Stillinger and T. A. Weber},\n\ttitle = {Lindemann melting criterion and the Gaussian core model},\n\tjournal = {Physical Review B}\n}", "url:https://en.m.wikipedia.org/wiki/Melting_point": "@misc{url:637536,\n url = {https://en.m.wikipedia.org/wiki/Melting_point}\n }", "url:https://en.wikipedia.org/wiki/Electron_mobility": "@misc{url:685331,\n url = {https://en.wikipedia.org/wiki/Electron_mobility}\n }", "@article{kasap2001hall,title={Hall effect in semiconductors},author={Kasap, Safa}}": "@article{kasap2001hall,title={Hall effect in semiconductors},author={Kasap, Safa}}", "url:https://en.wikipedia.org/wiki/Drude_model": "@misc{url:692259,\n url = {https://en.wikipedia.org/wiki/Drude_model}\n }", "url:https://link.aps.org/doi/10.1103/PhysRev.98.1751": "@misc{url:844896,\n url = {https://link.aps.org/doi/10.1103/PhysRev.98.1751}\n }", "url:https://www.sciencedirect.com/science/article/pii/S0257897202005935": "@misc{url:198185,\n url = {https://www.sciencedirect.com/science/article/pii/S0257897202005935}\n }", "url:https://journals.aps.org/prb/abstract/10.1103/PhysRevB.46.6131": "@misc{url:835839,\n url = {https://journals.aps.org/prb/abstract/10.1103/PhysRevB.46.6131}\n }", "doi:10.1134/1.1666949": "@article{Belomestnykh_2004,\n\tdoi = {10.1134/1.1666949},\n\turl = {https://doi.org/10.1134%2F1.1666949},\n\tyear = 2004,\n\tmonth = {feb},\n\tpublisher = {Pleiades Publishing Ltd},\n\tvolume = {30},\n\tnumber = {2},\n\tpages = {91--93},\n\tauthor = {V. N. Belomestnykh},\n\ttitle = {The acoustical Gr\u00c3\u00bcneisen constants of solids},\n\tjournal = {Technical Physics Letters}\n}", "isbn:0471847488": "@book{9780471847489,\n title = {The Art Of Experimental Physics},\n author = {Daryl W. Preston and Eric R. Dietz},\n isbn = {9780471847489},\n year = {1991},\n publisher = {John Wiley & Sons Inc}\n}", "@incollection{Morelli,\n\tyear = {2006},\n\tdoi = {10.1007/0-387-25100-6_2},\n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2},\n\tpublisher = {Springer-Verlag},\n\tpages = {37--68},\n\tauthor = {Donald T. Morelli and Glen A. Slack},\n\ttitle = {High Lattice Thermal Conductivity Solids},\n\tbooktitle = {High Thermal Conductivity Materials}\n}": "@incollection{Morelli,\n\tyear = {2006},\n\tdoi = {10.1007/0-387-25100-6_2},\n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2},\n\tpublisher = {Springer-Verlag},\n\tpages = {37--68},\n\tauthor = {Donald T. Morelli and Glen A. Slack},\n\ttitle = {High Lattice Thermal Conductivity Solids},\n\tbooktitle = {High Thermal Conductivity Materials}\n}", "@incollection{Morelli, \n\tdoi = {10.1007/0-387-25100-6_2}, \n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2}, \n\tpublisher = {Springer-Verlag}, \n\tpages = {37--68}, \n\tyear = {2006}, \n\tauthor = {Donald T. Morelli and Glen A. Slack}, \n\ttitle = {High Lattice Thermal Conductivity Solids}, \n\tbooktitle = {High Thermal Conductivity Materials}\n}": "@incollection{Morelli, \n\tdoi = {10.1007/0-387-25100-6_2}, \n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2}, \n\tpublisher = {Springer-Verlag}, \n\tpages = {37--68}, \n\tyear = {2006}, \n\tauthor = {Donald T. Morelli and Glen A. Slack}, \n\ttitle = {High Lattice Thermal Conductivity Solids}, \n\tbooktitle = {High Thermal Conductivity Materials}\n}", "@incollection{Morelli, doi = {10.1007/0-387-25100-6_2}, turl = {https://doi.org/10.1007%2F0-387-25100-6_2}, publisher = {Springer-Verlag}, pages = {37--68}, year = {2006}, author = {Donald T. Morelli and Glen A. Slack}, title = {High Lattice Thermal Conductivity Solids}, booktitle = {High Thermal Conductivity Materials}}": "@incollection{Morelli, doi = {10.1007/0-387-25100-6_2}, turl = {https://doi.org/10.1007%2F0-387-25100-6_2}, publisher = {Springer-Verlag}, pages = {37--68}, year = {2006}, author = {Donald T. Morelli and Glen A. Slack}, title = {High Lattice Thermal Conductivity Solids}, booktitle = {High Thermal Conductivity Materials}}", "@incollection{Morelli, doi = {10.1007/0-387-25100-6_2}, url = {https://doi.org/10.1007%2F0-387-25100-6_2}, publisher = {Springer-Verlag}, pages = {37--68}, year = {2006}, author = {Donald T. Morelli and Glen A. Slack}, title = {High Lattice Thermal Conductivity Solids}, booktitle = {High Thermal Conductivity Materials}}": "@incollection{Morelli, doi = {10.1007/0-387-25100-6_2}, url = {https://doi.org/10.1007%2F0-387-25100-6_2}, publisher = {Springer-Verlag}, pages = {37--68}, year = {2006}, author = {Donald T. Morelli and Glen A. Slack}, title = {High Lattice Thermal Conductivity Solids}, booktitle = {High Thermal Conductivity Materials}}", "doi:10.1016/j.commatsci.2012.02.005": "@article{Curtarolo_2012,\n\tdoi = {10.1016/j.commatsci.2012.02.005},\n\turl = {https://doi.org/10.1016%2Fj.commatsci.2012.02.005},\n\tyear = 2012,\n\tmonth = {jun},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {58},\n\tpages = {218--226},\n\tauthor = {Stefano Curtarolo and Wahyu Setyawan and Gus L.W. Hart and Michal Jahnatek and Roman V. Chepulskii and Richard H. Taylor and Shidong Wang and Junkai Xue and Kesong Yang and Ohad Levy and Michael J. Mehl and Harold T. Stokes and Denis O. Demchenko and Dane Morgan},\n\ttitle = {{AFLOW}: An automatic framework for high-throughput materials discovery},\n\tjournal = {Computational Materials Science}\n}", "doi:10.1016/j.commatsci.2014.05.014": "@article{Taylor_2014,\n\tdoi = {10.1016/j.commatsci.2014.05.014},\n\turl = {https://doi.org/10.1016%2Fj.commatsci.2014.05.014},\n\tyear = 2014,\n\tmonth = {oct},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {93},\n\tpages = {178--192},\n\tauthor = {Richard H. Taylor and Frisco Rose and Cormac Toher and Ohad Levy and Kesong Yang and Marco Buongiorno Nardelli and Stefano Curtarolo},\n\ttitle = {A {RESTful} {API} for exchanging materials data in the {AFLOWLIB}.org consortium},\n\tjournal = {Computational Materials Science}\n}", "doi:10.1016/j.commatsci.2017.04.036": "@article{Rose_2017,\n\tdoi = {10.1016/j.commatsci.2017.04.036},\n\turl = {https://doi.org/10.1016%2Fj.commatsci.2017.04.036},\n\tyear = 2017,\n\tmonth = {sep},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {137},\n\tpages = {362--370},\n\tauthor = {Frisco Rose and Cormac Toher and Eric Gossett and Corey Oses and Marco Buongiorno Nardelli and Marco Fornari and Stefano Curtarolo},\n\ttitle = {{AFLUX}: The {LUX} materials search {API} for the {AFLOW} data repositories},\n\tjournal = {Computational Materials Science}\n}", "url:https://en.wikipedia.org/wiki/Debye_model": "@misc{url:338569,\n url = {https://en.wikipedia.org/wiki/Debye_model}\n }", "doi:10.1002/andp.19123441404": "@article{Debye_1912,\n\tdoi = {10.1002/andp.19123441404},\n\turl = {https://doi.org/10.1002%2Fandp.19123441404},\n\tyear = 1912,\n\tpublisher = {Wiley},\n\tvolume = {344},\n\tnumber = {14},\n\tpages = {789--839},\n\tauthor = {P. Debye},\n\ttitle = {Zur Theorie der spezifischen W\u00c3\u00a4rmen},\n\tjournal = {Annalen der Physik}\n}", "url:https://eng.libretexts.org/Bookshelves/Materials_Science/Supplemental_Modules_(Materials_Science)/Electronic_Properties/Debye_Model_For_Specific_Heat": "@misc{url:422223,\n url = {https://eng.libretexts.org/Bookshelves/Materials_Science/Supplemental_Modules_(Materials_Science)/Electronic_Properties/Debye_Model_For_Specific_Heat}\n }"} \ No newline at end of file +{"url:http://hyperphysics.phy-astr.gsu.edu/hbase/thermo/thercond.html#c2": "@misc{url:249720,\n url = {http://hyperphysics.phy-astr.gsu.edu/hbase/thermo/thercond.html#c2}\n }", "doi:10.1039/C4EE03157A": "@article{Yan_2015,\n\tdoi = {10.1039/c4ee03157a},\n\turl = {https://doi.org/10.1039%2Fc4ee03157a},\n\tyear = 2015,\n\tpublisher = {Royal Society of Chemistry ({RSC})},\n\tvolume = {8},\n\tnumber = {3},\n\tpages = {983--994},\n\tauthor = {Jun Yan and Prashun Gorai and Brenden Ortiz and Sam Miller and Scott A. Barnett and Thomas Mason and Vladan Stevanovi{\\'{c}} and Eric S. Toberer},\n\ttitle = {Material descriptors for predicting thermoelectric performance},\n\tjournal = {Energy {\\&} Environmental Science}\n}", "url:https://en.wikipedia.org/wiki/Refractive_index#Complex_refractive_indexf": "@misc{url:103965,\n url = {https://en.wikipedia.org/wiki/Refractive_index#Complex_refractive_indexf}\n }", "url:https://en.wikipedia.org/wiki/Refractive_index#Relative_permittivity_and_permeability": "@misc{url:153771,\n url = {https://en.wikipedia.org/wiki/Refractive_index#Relative_permittivity_and_permeability}\n }", "url:https://en.wikipedia.org/wiki/Refractive_index#Relative_permittivity_and_permeability}": "@misc{url:368625,\n url = {https://en.wikipedia.org/wiki/Refractive_index#Relative_permittivity_and_permeability}}\n }", "doi:10.1021/co200012w": "@article{Setyawan_2011,\n\tdoi = {10.1021/co200012w},\n\turl = {https://doi.org/10.1021%2Fco200012w},\n\tyear = 2011,\n\tmonth = {jun},\n\tpublisher = {American Chemical Society ({ACS})},\n\tvolume = {13},\n\tnumber = {4},\n\tpages = {382--390},\n\tauthor = {Wahyu Setyawan and Romain M. Gaume and Stephanie Lam and Robert S. Feigelson and Stefano Curtarolo},\n\ttitle = {High-Throughput Combinatorial Database of Electronic Band Structures for Inorganic Scintillator Materials},\n\tjournal = {{ACS} Combinatorial Science}\n}", "url:https://en.wikipedia.org/wiki/Schmid%27s_law": "@misc{url:475884,\n url = {https://en.wikipedia.org/wiki/Schmid%27s_law}\n }", "doi:10.1016/j.infrared.2006.04.001": "@article{Ravindra_2007,\n\tdoi = {10.1016/j.infrared.2006.04.001},\n\turl = {https://doi.org/10.1016%2Fj.infrared.2006.04.001},\n\tyear = 2007,\n\tmonth = {mar},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {50},\n\tnumber = {1},\n\tpages = {21--29},\n\tauthor = {N.M. Ravindra and Preethi Ganapathy and Jinsoo Choi},\n\ttitle = {Energy gap{\\textendash}refractive index relations in semiconductors {\\textendash} An overview},\n\tjournal = {Infrared Physics {\\&} Technology}\n}", "doi:10.1002/pssb.2221310202": "@article{Moss_1985,\n\tdoi = {10.1002/pssb.2221310202},\n\turl = {https://doi.org/10.1002%2Fpssb.2221310202},\n\tyear = 1985,\n\tmonth = {oct},\n\tpublisher = {Wiley},\n\tvolume = {131},\n\tnumber = {2},\n\tpages = {415--427},\n\tauthor = {T. S. Moss},\n\ttitle = {Relations between the Refractive Index and Energy Gap of Semiconductors},\n\tjournal = {physica status solidi (b)}\n}", "doi:10.1002/pssb.2221000240": "@article{Gupta_1980,\n\tdoi = {10.1002/pssb.2221000240},\n\turl = {https://doi.org/10.1002%2Fpssb.2221000240},\n\tyear = 1980,\n\tmonth = {aug},\n\tpublisher = {Wiley},\n\tvolume = {100},\n\tnumber = {2},\n\tpages = {715--719},\n\tauthor = {V. P. Gupta and N. M. Ravindra},\n\ttitle = {Comments on the Moss Formula},\n\tjournal = {physica status solidi (b)}\n}", "doi:10.1016/j.commatsci.2012.10.028": "@article{Ong_2013,\n\tdoi = {10.1016/j.commatsci.2012.10.028},\n\turl = {https://doi.org/10.1016%2Fj.commatsci.2012.10.028},\n\tyear = 2013,\n\tmonth = {feb},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {68},\n\tpages = {314--319},\n\tauthor = {Shyue Ping Ong and William Davidson Richards and Anubhav Jain and Geoffroy Hautier and Michael Kocher and Shreyas Cholia and Dan Gunter and Vincent L. Chevrier and Kristin A. Persson and Gerbrand Ceder},\n\ttitle = {Python Materials Genomics (pymatgen): A robust, open-source python library for materials analysis},\n\tjournal = {Computational Materials Science}\n}", "url:https://onlinelibrary.wiley.com/doi/abs/10.1002/pssb.2221630145": "@misc{url:502475,\n url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/pssb.2221630145}\n }", "url:https://en.wikipedia.org/wiki/Peierls_stress}": "@misc{url:886830,\n url = {https://en.wikipedia.org/wiki/Peierls_stress}}\n }", "url:https://en.wikipedia.org/wiki/Refractive_index#Complex_refractive_index": "@misc{url:715955,\n url = {https://en.wikipedia.org/wiki/Refractive_index#Complex_refractive_index}\n }", "url:https://en.wikipedia.org/wiki/Electrical_resistivity_and_conductivity": "@misc{url:805288,\n url = {https://en.wikipedia.org/wiki/Electrical_resistivity_and_conductivity}\n }", "doi:10.1021/acs.jpcc.7b07421": "@article{Morales_Garc_a_2017,\n\tdoi = {10.1021/acs.jpcc.7b07421},\n\turl = {https://doi.org/10.1021%2Facs.jpcc.7b07421},\n\tyear = 2017,\n\tmonth = {aug},\n\tpublisher = {American Chemical Society ({ACS})},\n\tvolume = {121},\n\tnumber = {34},\n\tpages = {18862--18866},\n\tauthor = {{\\'{A}}ngel Morales-Garc{\\'{\\i}}a and Rosendo Valero and Francesc Illas},\n\ttitle = {An Empirical, yet Practical Way To Predict the Band Gap in Solids by Using Density Functional Band Structure Calculations},\n\tjournal = {The Journal of Physical Chemistry C}\n}", "url:https://unlcms.unl.edu/cas/physics/tsymbal/teaching/SSP-927/Section%2013_Optical_Properties_of_Solids.pdf": "@misc{url:121737,\n url = {https://unlcms.unl.edu/cas/physics/tsymbal/teaching/SSP-927/Section%2013_Optical_Properties_of_Solids.pdf}\n }", "doi:10.1016/S0257-8972(02)00593-5": "@article{Clarke_2003,\n\tdoi = {10.1016/s0257-8972(02)00593-5},\n\turl = {https://doi.org/10.1016%2Fs0257-8972%2802%2900593-5},\n\tyear = 2003,\n\tmonth = {jan},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {163-164},\n\tpages = {67--74},\n\tauthor = {David R. Clarke},\n\ttitle = {Materials selection guidelines for low thermal conductivity thermal barrier coatings},\n\tjournal = {Surface and Coatings Technology}\n}", "doi:10.1039/C6TA04121C": "@article{Gorai_2016,\n\tdoi = {10.1039/c6ta04121c},\n\turl = {https://doi.org/10.1039%2Fc6ta04121c},\n\tyear = 2016,\n\tpublisher = {Royal Society of Chemistry ({RSC})},\n\tvolume = {4},\n\tnumber = {28},\n\tpages = {11110--11116},\n\tauthor = {Prashun Gorai and Eric S. Toberer and Vladan Stevanovi{\\'{c}}},\n\ttitle = {Computational identification of promising thermoelectric materials among known quasi-2D binary compounds},\n\tjournal = {Journal of Materials Chemistry A}\n}", "doi:10.1021/acs.nanolett.6b05229": "@article{Cheon_2017,\n\tdoi = {10.1021/acs.nanolett.6b05229},\n\turl = {https://doi.org/10.1021%2Facs.nanolett.6b05229},\n\tyear = 2017,\n\tmonth = {feb},\n\tpublisher = {American Chemical Society ({ACS})},\n\tvolume = {17},\n\tnumber = {3},\n\tpages = {1915--1923},\n\tauthor = {Gowoon Cheon and Karel-Alexander N. Duerloo and Austin D. Sendek and Chase Porter and Yuan Chen and Evan J. Reed},\n\ttitle = {Data Mining for New Two- and One-Dimensional Weakly Bonded Solids and Lattice-Commensurate Heterostructures},\n\tjournal = {Nano Letters}\n}", "doi:10.1021/cm400893e": "@article{Gaultois_2013,\n\tdoi = {10.1021/cm400893e},\n\turl = {https://doi.org/10.1021%2Fcm400893e},\n\tyear = 2013,\n\tmonth = {may},\n\tpublisher = {American Chemical Society ({ACS})},\n\tvolume = {25},\n\tnumber = {15},\n\tpages = {2911--2920},\n\tauthor = {Michael W. Gaultois and Taylor D. Sparks and Christopher K. H. Borg and Ram Seshadri and William D. Bonificio and David R. Clarke},\n\ttitle = {Data-Driven Review of Thermoelectric Materials: Performance and Resource Considerations},\n\tjournal = {Chemistry of Materials}\n}", "url:https://en.wikipedia.org/wiki/Elastic_modulus": "@misc{url:169345,\n url = {https://en.wikipedia.org/wiki/Elastic_modulus}\n }", "url:https://link.springer.com/article/10.1134/1.1666949": "@misc{url:560228,\n url = {https://link.springer.com/article/10.1134/1.1666949}\n }", "doi:10.1007/0-387-25100-6_2": "@incollection{Morelli,\n\tdoi = {10.1007/0-387-25100-6_2},\n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2},\n\tpublisher = {Springer-Verlag},\n\tpages = {37--68},\n\tauthor = {Donald T. Morelli and Glen A. Slack},\n\ttitle = {High Lattice Thermal Conductivity Solids},\n\tbooktitle = {High Thermal Conductivity Materials}\n}", "doi:10.1002/adfm.201600718": "@article{Wei_2016,\n\tdoi = {10.1002/adfm.201600718},\n\turl = {https://doi.org/10.1002%2Fadfm.201600718},\n\tyear = 2016,\n\tmonth = {may},\n\tpublisher = {Wiley},\n\tvolume = {26},\n\tnumber = {29},\n\tpages = {5360--5367},\n\tauthor = {Ping Wei and Jiong Yang and Liang Guo and Shanyu Wang and Lihua Wu and Xianfan Xu and Wenyu Zhao and Qingjie Zhang and Wenqing Zhang and Mildred S. Dresselhaus and Jihui Yang},\n\ttitle = {Minimum Thermal Conductivity in Weak Topological Insulators with Bismuth-Based Stack Structure},\n\tjournal = {Advanced Functional Materials}\n}", "url:https://en.wikipedia.org/wiki/Gr%C3%BCneisen_parameter": "@misc{url:418561,\n url = {https://en.wikipedia.org/wiki/Gr%C3%BCneisen_parameter}\n }", "doi:10.1063/1.2737054": "@article{Chakravarty_2007,\n\tdoi = {10.1063/1.2737054},\n\turl = {https://doi.org/10.1063%2F1.2737054},\n\tyear = 2007,\n\tmonth = {may},\n\tpublisher = {{AIP} Publishing},\n\tvolume = {126},\n\tnumber = {20},\n\tpages = {204508},\n\tauthor = {Charusita Chakravarty and Pablo G. Debenedetti and Frank H. Stillinger},\n\ttitle = {Lindemann measures for the solid-liquid phase transition},\n\tjournal = {The Journal of Chemical Physics}\n}", "doi:10.1103/PhysRevB.22.3790": "@article{Stillinger_1980,\n\tdoi = {10.1103/physrevb.22.3790},\n\turl = {https://doi.org/10.1103%2Fphysrevb.22.3790},\n\tyear = 1980,\n\tmonth = {oct},\n\tpublisher = {American Physical Society ({APS})},\n\tvolume = {22},\n\tnumber = {8},\n\tpages = {3790--3794},\n\tauthor = {F. H. Stillinger and T. A. Weber},\n\ttitle = {Lindemann melting criterion and the Gaussian core model},\n\tjournal = {Physical Review B}\n}", "url:https://en.m.wikipedia.org/wiki/Melting_point": "@misc{url:637536,\n url = {https://en.m.wikipedia.org/wiki/Melting_point}\n }", "url:https://en.wikipedia.org/wiki/Electron_mobility": "@misc{url:685331,\n url = {https://en.wikipedia.org/wiki/Electron_mobility}\n }", "@article{kasap2001hall,title={Hall effect in semiconductors},author={Kasap, Safa}}": "@article{kasap2001hall,title={Hall effect in semiconductors},author={Kasap, Safa}}", "url:https://en.wikipedia.org/wiki/Drude_model": "@misc{url:692259,\n url = {https://en.wikipedia.org/wiki/Drude_model}\n }", "url:https://link.aps.org/doi/10.1103/PhysRev.98.1751": "@misc{url:844896,\n url = {https://link.aps.org/doi/10.1103/PhysRev.98.1751}\n }", "url:https://www.sciencedirect.com/science/article/pii/S0257897202005935": "@misc{url:198185,\n url = {https://www.sciencedirect.com/science/article/pii/S0257897202005935}\n }", "url:https://journals.aps.org/prb/abstract/10.1103/PhysRevB.46.6131": "@misc{url:835839,\n url = {https://journals.aps.org/prb/abstract/10.1103/PhysRevB.46.6131}\n }", "doi:10.1134/1.1666949": "@article{Belomestnykh_2004,\n\tdoi = {10.1134/1.1666949},\n\turl = {https://doi.org/10.1134%2F1.1666949},\n\tyear = 2004,\n\tmonth = {feb},\n\tpublisher = {Pleiades Publishing Ltd},\n\tvolume = {30},\n\tnumber = {2},\n\tpages = {91--93},\n\tauthor = {V. N. Belomestnykh},\n\ttitle = {The acoustical Gr\u00c3\u00bcneisen constants of solids},\n\tjournal = {Technical Physics Letters}\n}", "isbn:0471847488": "@book{9780471847489,\n title = {The Art Of Experimental Physics},\n author = {Daryl W. Preston and Eric R. Dietz},\n isbn = {9780471847489},\n year = {1991},\n publisher = {John Wiley & Sons Inc}\n}", "@incollection{Morelli,\n\tyear = {2006},\n\tdoi = {10.1007/0-387-25100-6_2},\n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2},\n\tpublisher = {Springer-Verlag},\n\tpages = {37--68},\n\tauthor = {Donald T. Morelli and Glen A. Slack},\n\ttitle = {High Lattice Thermal Conductivity Solids},\n\tbooktitle = {High Thermal Conductivity Materials}\n}": "@incollection{Morelli,\n\tyear = {2006},\n\tdoi = {10.1007/0-387-25100-6_2},\n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2},\n\tpublisher = {Springer-Verlag},\n\tpages = {37--68},\n\tauthor = {Donald T. Morelli and Glen A. Slack},\n\ttitle = {High Lattice Thermal Conductivity Solids},\n\tbooktitle = {High Thermal Conductivity Materials}\n}", "@incollection{Morelli, \n\tdoi = {10.1007/0-387-25100-6_2}, \n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2}, \n\tpublisher = {Springer-Verlag}, \n\tpages = {37--68}, \n\tyear = {2006}, \n\tauthor = {Donald T. Morelli and Glen A. Slack}, \n\ttitle = {High Lattice Thermal Conductivity Solids}, \n\tbooktitle = {High Thermal Conductivity Materials}\n}": "@incollection{Morelli, \n\tdoi = {10.1007/0-387-25100-6_2}, \n\turl = {https://doi.org/10.1007%2F0-387-25100-6_2}, \n\tpublisher = {Springer-Verlag}, \n\tpages = {37--68}, \n\tyear = {2006}, \n\tauthor = {Donald T. Morelli and Glen A. Slack}, \n\ttitle = {High Lattice Thermal Conductivity Solids}, \n\tbooktitle = {High Thermal Conductivity Materials}\n}", "@incollection{Morelli, doi = {10.1007/0-387-25100-6_2}, turl = {https://doi.org/10.1007%2F0-387-25100-6_2}, publisher = {Springer-Verlag}, pages = {37--68}, year = {2006}, author = {Donald T. Morelli and Glen A. Slack}, title = {High Lattice Thermal Conductivity Solids}, booktitle = {High Thermal Conductivity Materials}}": "@incollection{Morelli, doi = {10.1007/0-387-25100-6_2}, turl = {https://doi.org/10.1007%2F0-387-25100-6_2}, publisher = {Springer-Verlag}, pages = {37--68}, year = {2006}, author = {Donald T. Morelli and Glen A. Slack}, title = {High Lattice Thermal Conductivity Solids}, booktitle = {High Thermal Conductivity Materials}}", "@incollection{Morelli, doi = {10.1007/0-387-25100-6_2}, url = {https://doi.org/10.1007%2F0-387-25100-6_2}, publisher = {Springer-Verlag}, pages = {37--68}, year = {2006}, author = {Donald T. Morelli and Glen A. Slack}, title = {High Lattice Thermal Conductivity Solids}, booktitle = {High Thermal Conductivity Materials}}": "@incollection{Morelli, doi = {10.1007/0-387-25100-6_2}, url = {https://doi.org/10.1007%2F0-387-25100-6_2}, publisher = {Springer-Verlag}, pages = {37--68}, year = {2006}, author = {Donald T. Morelli and Glen A. Slack}, title = {High Lattice Thermal Conductivity Solids}, booktitle = {High Thermal Conductivity Materials}}", "doi:10.1016/j.commatsci.2012.02.005": "@article{Curtarolo_2012,\n\tdoi = {10.1016/j.commatsci.2012.02.005},\n\turl = {https://doi.org/10.1016%2Fj.commatsci.2012.02.005},\n\tyear = 2012,\n\tmonth = {jun},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {58},\n\tpages = {218--226},\n\tauthor = {Stefano Curtarolo and Wahyu Setyawan and Gus L.W. Hart and Michal Jahnatek and Roman V. Chepulskii and Richard H. Taylor and Shidong Wang and Junkai Xue and Kesong Yang and Ohad Levy and Michael J. Mehl and Harold T. Stokes and Denis O. Demchenko and Dane Morgan},\n\ttitle = {{AFLOW}: An automatic framework for high-throughput materials discovery},\n\tjournal = {Computational Materials Science}\n}", "doi:10.1016/j.commatsci.2014.05.014": "@article{Taylor_2014,\n\tdoi = {10.1016/j.commatsci.2014.05.014},\n\turl = {https://doi.org/10.1016%2Fj.commatsci.2014.05.014},\n\tyear = 2014,\n\tmonth = {oct},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {93},\n\tpages = {178--192},\n\tauthor = {Richard H. Taylor and Frisco Rose and Cormac Toher and Ohad Levy and Kesong Yang and Marco Buongiorno Nardelli and Stefano Curtarolo},\n\ttitle = {A {RESTful} {API} for exchanging materials data in the {AFLOWLIB}.org consortium},\n\tjournal = {Computational Materials Science}\n}", "doi:10.1016/j.commatsci.2017.04.036": "@article{Rose_2017,\n\tdoi = {10.1016/j.commatsci.2017.04.036},\n\turl = {https://doi.org/10.1016%2Fj.commatsci.2017.04.036},\n\tyear = 2017,\n\tmonth = {sep},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {137},\n\tpages = {362--370},\n\tauthor = {Frisco Rose and Cormac Toher and Eric Gossett and Corey Oses and Marco Buongiorno Nardelli and Marco Fornari and Stefano Curtarolo},\n\ttitle = {{AFLUX}: The {LUX} materials search {API} for the {AFLOW} data repositories},\n\tjournal = {Computational Materials Science}\n}", "url:https://en.wikipedia.org/wiki/Debye_model": "@misc{url:338569,\n url = {https://en.wikipedia.org/wiki/Debye_model}\n }", "doi:10.1002/andp.19123441404": "@article{Debye_1912,\n\tdoi = {10.1002/andp.19123441404},\n\turl = {https://doi.org/10.1002%2Fandp.19123441404},\n\tyear = 1912,\n\tpublisher = {Wiley},\n\tvolume = {344},\n\tnumber = {14},\n\tpages = {789--839},\n\tauthor = {P. Debye},\n\ttitle = {Zur Theorie der spezifischen W\u00c3\u00a4rmen},\n\tjournal = {Annalen der Physik}\n}", "url:https://eng.libretexts.org/Bookshelves/Materials_Science/Supplemental_Modules_(Materials_Science)/Electronic_Properties/Debye_Model_For_Specific_Heat": "@misc{url:422223,\n url = {https://eng.libretexts.org/Bookshelves/Materials_Science/Supplemental_Modules_(Materials_Science)/Electronic_Properties/Debye_Model_For_Specific_Heat}\n }", "@article{Ravindra_2007,\n\tdoi = {10.1016/j.infrared.2006.04.001},\n\turl = {https://doi.org/10.1016%2Fj.infrared.2006.04.001},\n\tyear = 2007,\n\tmonth = {mar},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {50},\n\tnumber = {1},\n\tpages = {21--29},\n\tauthor = {N.M. Ravindra and Preethi Ganapathy and Jinsoo Choi},\n\ttitle = {Energy gap{\\textendash}refractive index relations in semiconductors {\\textendash} An overview},\n\tjournal = {Infrared Physics {\\&} Technology}\n}": "@article{Ravindra_2007,\n\tdoi = {10.1016/j.infrared.2006.04.001},\n\turl = {https://doi.org/10.1016%2Fj.infrared.2006.04.001},\n\tyear = 2007,\n\tmonth = {mar},\n\tpublisher = {Elsevier {BV}},\n\tvolume = {50},\n\tnumber = {1},\n\tpages = {21--29},\n\tauthor = {N.M. Ravindra and Preethi Ganapathy and Jinsoo Choi},\n\ttitle = {Energy gap{\\textendash}refractive index relations in semiconductors {\\textendash} An overview},\n\tjournal = {Infrared Physics {\\&} Technology}\n}", "@article{Moss_1985,\n\tdoi = {10.1002/pssb.2221310202},\n\turl = {https://doi.org/10.1002%2Fpssb.2221310202},\n\tyear = 1985,\n\tmonth = {oct},\n\tpublisher = {Wiley},\n\tvolume = {131},\n\tnumber = {2},\n\tpages = {415--427},\n\tauthor = {T. S. Moss},\n\ttitle = {Relations between the Refractive Index and Energy Gap of Semiconductors},\n\tjournal = {physica status solidi (b)}\n}": "@article{Moss_1985,\n\tdoi = {10.1002/pssb.2221310202},\n\turl = {https://doi.org/10.1002%2Fpssb.2221310202},\n\tyear = 1985,\n\tmonth = {oct},\n\tpublisher = {Wiley},\n\tvolume = {131},\n\tnumber = {2},\n\tpages = {415--427},\n\tauthor = {T. S. Moss},\n\ttitle = {Relations between the Refractive Index and Energy Gap of Semiconductors},\n\tjournal = {physica status solidi (b)}\n}", "@article{Gupta_1980,\n\tdoi = {10.1002/pssb.2221000240},\n\turl = {https://doi.org/10.1002%2Fpssb.2221000240},\n\tyear = 1980,\n\tmonth = {aug},\n\tpublisher = {Wiley},\n\tvolume = {100},\n\tnumber = {2},\n\tpages = {715--719},\n\tauthor = {V. P. Gupta and N. M. Ravindra},\n\ttitle = {Comments on the Moss Formula},\n\tjournal = {physica status solidi (b)}\n}": "@article{Gupta_1980,\n\tdoi = {10.1002/pssb.2221000240},\n\turl = {https://doi.org/10.1002%2Fpssb.2221000240},\n\tyear = 1980,\n\tmonth = {aug},\n\tpublisher = {Wiley},\n\tvolume = {100},\n\tnumber = {2},\n\tpages = {715--719},\n\tauthor = {V. P. Gupta and N. M. Ravindra},\n\ttitle = {Comments on the Moss Formula},\n\tjournal = {physica status solidi (b)}\n}"} \ No newline at end of file diff --git a/propnet/dbtools/correlation.py b/propnet/dbtools/correlation.py index 2accc6cd..3295f63c 100644 --- a/propnet/dbtools/correlation.py +++ b/propnet/dbtools/correlation.py @@ -1,18 +1,55 @@ -from maggma.builders import Builder -from itertools import combinations_with_replacement -import numpy as np +"""Database builder for correlation calculations. + +This module calculates correlation scores between scalar materials properties +to elucidate relationships between properties and writes the results to a MongoDB +collection with optional output to a file. The builder requires a propnet +quantity database (preferred) or a full materials database (slower) as input. + +Example: + The builder can be executed using a ``maggma`` Runner in a Python script: + + >>> from propnet.dbtools.correlation import CorrelationBuilder + >>> from maggma.stores import MongoStore + >>> from maggma.runner import Runner + >>> pn_quantity_db = MongoStore(...) # read access + >>> pn_correlation_db = MongoStore(...) # write access + >>> cb = CorrelationBuilder(pn_quantity_db, pn_correlation_db, + >>> from_quantity_db=True) + >>> runner = Runner([cb]) + >>> runner.run() + + It can also be run using the ``maggma`` command line tool. In Python, build + the Runner as above, and then: + + >>> from monty.serialization import dumpfn + >>> dumpfn(runner, 'correlation_runner.json') + + And then in the terminal:: + + $ mrun -n NUM_PROCS correlation_runner.json + + If running on a large data set (>100k materials), ``NUM_PROCS`` should be small + to ensure the system RAM is not used up, especially if using ``'mic'`` correlation. + +""" + +import logging +import re import json from collections import defaultdict +from itertools import combinations_with_replacement + +import numpy as np +from maggma.builders import Builder + from propnet.core.graph import Graph from propnet import ureg -import logging -import re - # noinspection PyUnresolvedReferences import propnet.models from propnet.core.registry import Registry logger = logging.getLogger(__name__) +"""logging.Logger: Logger for debugging""" class CorrelationBuilder(Builder): @@ -21,12 +58,25 @@ class CorrelationBuilder(Builder): using a suite of regression tools. Uses the Builder architecture for optional parallel processing of data. - Note: serialization of builder does not work with custom correlation functions, although - interactive use does support them. - + Notes: + - Serialization of builder using ``as_dict()`` does not work with custom correlation + functions, although interactive use does support them. + + Attributes: + propnet_store (maggma.stores.Store): MongoDB collection containing quantity or material + data to be queried for correlation calculation. + correlation_store (maggma.stores.Store): MongoDB collection to which correlation calculation + data will be written. + from_quantity_db (bool): ``True`` if ``propnet_store`` follows the quantity-based schema created + with ``propnet.dbtools.separation.SeparationBuilder``. ``False`` if ``propnet_store`` follows + the material-based schema which is default output from ``propnet.dbtools.mp_builder.PropnetBuilder``. + out_file (`str` or `None`): file name to output correlation data after builder is complete. + sample_size (`int` or `None`): maximum number of randomly-sampled data points to include in + correlation calculations. If ``None``, no limit is imposed. """ - PROPNET_PROPS = [v.name for v in Registry("symbols").values() + _SCALAR_PROPS = [v.name for v in Registry("symbols").values() if (v.category == 'property' and v.shape == 1)] + """List of the names of all scalar properties contained in propnet.""" def __init__(self, propnet_store, correlation_store, out_file=None, @@ -34,33 +84,35 @@ def __init__(self, propnet_store, sample_size=None, from_quantity_db=True, **kwargs): """ - Constructor for the correlation builder. - Args: - propnet_store (Mongolike Store): store instance pointing to propnet collection + propnet_store (maggma.stores.Store): store instance pointing to propnet collection with read access - correlation_store (Mongolike Store): store instance pointing to collection with write access - out_file (str): optional, filename to output data in JSON format (useful if using a MemoryStore - for correlation_store) - funcs (`str`, `callable`, list of `str` or `callable`) functions to use for correlation. + correlation_store (maggma.stores.Store): store instance pointing to collection with write access + out_file (str): optional, filename to output data in JSON format (useful if using a ``MemoryStore`` + for ``correlation_store``). Default: None (no file output) + funcs (`str`, `callable`, `list` of `str` and/or `callable`): functions to use for correlation. Custom + functions can be passed into this argument but are not JSON-serializable. Built-in functions can be specified by the following strings: - linlsq (default): linear least-squares, reports R^2 - pearson: Pearson r-correlation, reports r - spearman: Spearman rank correlation, reports r - mic: maximal-information non-parametric exploration, reports maximal information coefficient - ransac: random sample consensus (RANSAC) regression, reports score - theilsen: Theil-Sen regression, reports score - all: runs all correlation functions above + - ``'linlsq'``: linear least-squares, reports R^2 (default) + - ``'pearson'``: Pearson r-correlation, reports r + - ``'spearman'``: Spearman rank correlation, reports r + - ``'mic'``: maximal-information non-parametric exploration, reports maximal information coefficient + - ``'ransac'``: random sample consensus (RANSAC) regression, reports score + - ``'theilsen'``: Theil-Sen regression, reports score + - ``'all'``: runs all correlation functions above + + Default: ``'linlsq'`` props (`list` of `str`): optional, list of properties for which to calculate the correlation. - Default is to calculate for all possible pairs (props=None) + Default: ``None`` (calculate for all possible pairs) sample_size (int): optional, limits correlation calculation data to a random sample of size - `sample_size`. Default: None (no limit) - from_quantity_db (bool): True means propnet_store follows the quantity-indexed database - schema, False means the full, material-indexed database schema. Note: querying quantity-indexed + `sample_size`. Default: ``None`` (no limit) + from_quantity_db (bool): ``True`` means ``propnet_store`` follows the quantity-indexed database + schema, ``False`` means the full, material-indexed database schema. Note: querying quantity-indexed databases is considerably faster than material-indexed. - Default: True (quantity schema) - **kwargs: arguments to the Builder superclass + Default: ``True`` (quantity schema) + **kwargs: arguments to the ``Builder`` superclass + """ self.propnet_store = propnet_store @@ -89,24 +141,30 @@ def __init__(self, propnet_store, if not self._funcs: raise ValueError("No valid correlation functions selected") - self._props = props or self.PROPNET_PROPS + self._props = props or self._SCALAR_PROPS if sample_size is not None and sample_size < 2: raise ValueError("Sample size must be greater than 1") self.sample_size = sample_size - self.total = None super(CorrelationBuilder, self).__init__(sources=[propnet_store], targets=[correlation_store], **kwargs) + @property + def total(self): + """ + int: total number of calculations that will be performed during build + """ + return len(self._props) ** 2 * len(self._funcs) + @classmethod def get_correlation_funcs(cls): """ Gets built-in correlation functions and their names. Returns: - dict: dict of function handles keyed by name + dict: dictionary of function handles keyed by name """ return {f.replace('_cfunc_', ''): getattr(cls, f) @@ -118,10 +176,15 @@ def get_items(self): Accumulates data and generates data sets for pairs of properties coupled with correlation functions. - Returns: - (generator): yields dicts of data (see _make_data_combinations()) + Yields: + dict: yields dictionaries of data with the following structure: + + - ``'x_data'`` (`list` of `float`): data for independent property (x-axis) + - ``'x_name'`` (str): name of independent property + - ``'y_data'`` (`list` of `float`): data for dependent property (y-axis) + - ``'y_name'`` (str): name of dependent property + - ``'func'`` (Tuple[str, callable]): name and function handle for correlation function """ - self.total = len(self._props) ** 2 * len(self._funcs) # combinations_with_replacement() produces all possible pairs of properties # without repeating, i.e. will give AB but not BA. Code below manually @@ -139,17 +202,17 @@ def get_items(self): @staticmethod def get_data_from_quantity_db(store, *props, sample_size=None, include_id=False): """ - Collects scalar data from the quantity-onlu propnet database, + Collects scalar data from the quantity-only propnet database, aggregates it by material and property, and samples it if desired. Args: - store (maggma.stores.Store): MongoDB store instance for quantity databse - *props (str): property names as strings - sample_size (int): If specified, limits the number of returned records - to sample_size, randomly selected. If total of records is less than - sample_size, only those records are returned. Default: None (all records) - include_id (bool): True includes the '_id' field, which contains the material - key for the record. Default: False (do not include the field) + store (maggma.stores.Store): MongoDB store instance for quantity database + *props (str): one or more property names as strings + sample_size (int): if specified, limits the number of returned records + to ``sample_size``, randomly selected. If total of records is less than + ``sample_size``, only those records are returned. Default: ``None`` (all records) + include_id (bool): ``True`` includes the ``_id`` field, which contains the material + key for the record. Default: ``False`` (do not include the field) Returns: dict: dictionary of data keyed by property name @@ -201,6 +264,7 @@ def get_data_from_quantity_db(store, *props, sample_size=None, include_id=False) return dict(data) def get_data_from_full_db(self, prop_x, prop_y): + # TODO: Extract this and the quantity db equivalent as propnet db API class """ Collects scalar data from full propnet database, aggregates it by property, and samples it if desired. @@ -277,13 +341,14 @@ def _make_data_combinations(self, prop_x, prop_y, data): prop_y (str): name of property y data (dict): dictionary of data keyed by property name - Returns: (generator) a generator providing a dictionary with the data for correlation: - {'x_data': (list) data for independent property (x-axis), - 'x_name': (str) name of independent property, - 'y_data': (list) data for dependent property (y-axis), - 'y_name': (str) name of dependent property, - 'func': (tuple) name and function handle for correlation function - } + Yields: + dict: a dictionary with the data for correlation with the following structure: + + - ``'x_data'`` (`list` of `float`): data for independent property (x-axis), + - ``'x_name'`` (str): name of independent property, + - ``'y_data'`` (`list` of `float`): data for dependent property (y-axis), + - ``'y_name'`` (str): name of dependent property, + - ``'func'`` (Tuple[str, callable]): name and function handle for correlation function """ # So we get AB and BA without re-querying, but not two AA @@ -302,23 +367,24 @@ def _make_data_combinations(self, prop_x, prop_y, data): def process_item(self, item): """ - Run correlation calculation on a pair of properties using the specified function. + Runs correlation calculation on a pair of properties using the specified function. Args: - item: (dict) input provided by get_items() (see get_items() for structure) - - Returns: (tuple) output of calculation with necessary - information about calculation included. Format in tuple: - independent property (x-axis) name, - dependent property (y-axis) name, - correlation value, - correlation function name, - number of data points used for correlation - length of shortest path between properties on propnet graph where x-axis property - is starting property and y-axis property is ending property. - Note: if no (forward) connection exists, the path length will be None. This does - not preclude y->x having a forward path. + item (dict): input provided by ``get_items()`` (see definition for structure) + Returns: + Tuple[str, str, float `or` Exception, str, int]: output of calculation with necessary + information about calculation included. Ordering of elements in tuple is: + + - independent property (x-axis) name + - dependent property (y-axis) name + - correlation value or exception if an error occurs + - correlation function name + - number of data points used for correlation + - length of shortest path between properties on propnet graph where x-axis property + is starting property and y-axis property is ending property. + Note: if no (forward) connection exists, the path length will be None. This does + not preclude y->x having a forward path. """ prop_x, prop_y = item['x_name'], item['y_name'] data_x, data_y = item['x_data'], item['y_data'] @@ -352,13 +418,14 @@ def process_item(self, item): @staticmethod def _cfunc_mic(x, y): """ - Get maximal information coefficient for data set. + Gets maximal information coefficient for data set. Args: - x: (list) independent property (x-axis) - y: (list) dependent property (y-axis) + x (`list` of `float`): independent property (x-axis) + y (`list` of `float`): dependent property (y-axis) - Returns: (float) maximal information coefficient + Returns: + float: maximal information coefficient """ from minepy import MINE @@ -369,13 +436,14 @@ def _cfunc_mic(x, y): @staticmethod def _cfunc_linlsq(x, y): """ - Get R^2 value for linear least-squares fit of a data set. + Gets R^2 value for linear least-squares fit of a data set. Args: - x: (list) independent property (x-axis) - y: (list) dependent property (y-axis) + x (`list` of `float`): independent property (x-axis) + y (`list` of `float`): dependent property (y-axis) - Returns: (float) R^2 value + Returns: + float: R^2 value """ from scipy import stats @@ -385,13 +453,14 @@ def _cfunc_linlsq(x, y): @staticmethod def _cfunc_pearson(x, y): """ - Get R value for Pearson fit of a data set. + Gets R value for Pearson fit of a data set. Args: - x: (list) independent property (x-axis) - y: (list) dependent property (y-axis) + x (`list` of `float`): independent property (x-axis) + y (`list` of `float`): dependent property (y-axis) - Returns: (float) Pearson R value + Returns: + float: Pearson R value """ from scipy import stats @@ -401,13 +470,14 @@ def _cfunc_pearson(x, y): @staticmethod def _cfunc_spearman(x, y): """ - Get R value for Spearman fit of a data set. + Gets R value for Spearman fit of a data set. Args: - x: (list) independent property (x-axis) - y: (list) dependent property (y-axis) + x (`list` of `float`): independent property (x-axis) + y (`list` of `float`): dependent property (y-axis) - Returns: (float) Spearman R value + Returns: + float: Spearman R value """ from scipy import stats @@ -417,13 +487,14 @@ def _cfunc_spearman(x, y): @staticmethod def _cfunc_ransac(x, y): """ - Get random sample consensus (RANSAC) regression score for data set. + Gets random sample consensus (RANSAC) regression score for data set. Args: - x: (list) independent property (x-axis) - y: (list) dependent property (y-axis) + x (`list` of `float`): independent property (x-axis) + y (`list` of `float`): dependent property (y-axis) - Returns: (float) RANSAC score + Returns: + float: RANSAC score """ from sklearn.linear_model import RANSACRegressor @@ -435,13 +506,14 @@ def _cfunc_ransac(x, y): @staticmethod def _cfunc_theilsen(x, y): """ - Get Theil-Sen regression score for data set. + Gets Theil-Sen regression score for data set. Args: - x: (list) independent property (x-axis) - y: (list) dependent property (y-axis) + x (`list` of `float`): independent property (x-axis) + y (`list` of `float`): dependent property (y-axis) - Returns: (float) Theil-Sen score + Returns: + float: Theil-Sen score """ from sklearn.linear_model import TheilSenRegressor @@ -452,10 +524,10 @@ def _cfunc_theilsen(x, y): def update_targets(self, items): """ - Write correlation data to Mongo store. + Writes correlation data to Mongo store. Args: - items: (list) list of results output by process_item() + items (`list` of `tuple`): list of results output by ``process_item()`` """ data = [] @@ -482,7 +554,8 @@ def finalize(self, cursor=None): clean-up function for Builder. Args: - cursor: (Mongo Store cursor) optional, cursor to close if not automatically closed. + cursor (pymongo.cursor.Cursor): optional, cursor to close if not automatically closed. + Default: ``None`` (no cursor to be closed) """ @@ -502,10 +575,10 @@ def finalize(self, cursor=None): def write_correlation_data_file(self, out_file): """ - Gets data dictionary containing correlation matrices and outputs to a file. + Writes data from ``get_correlation_matrices()`` to a JSON file. Args: - out_file: (str) file path and name for output to JSON file + out_file (str): file path and name of JSON file to write """ matrix = self.get_correlation_matrices() with open(out_file, 'w') as f: @@ -517,18 +590,18 @@ def get_correlation_matrices(self, func_name=None): correlation algorithm and properties of the data set. Args: - func_name: (str) optional, name of the correlation functions to include in the document - default: None, which is to include all that were run by this builder. + func_name (str): optional, name of the correlation functions to include in the document. + Default: ``None`` (include all that were run by this builder) - Returns: (dict) document containing correlation data. Format: - {'properties': (list) names of properties calculated in order of how they are indexed - in the matrices - 'n_points': (list>) list of lists (i.e. matrix) containing the number of data - points evaluated during the fitting procedure - 'correlation': (dict>>) dictionary of matrices containing correlation - results, keyed by correlation function name - } + Returns: + dict: dictionary containing correlation data with the following structure: + - ``'properties'`` (`list` of `str`) - names of properties calculated in order of how + they are indexed in the matrices + - ``'n_points'`` (`list` of `list` of `int`) - list of lists (i.e. matrix) containing + the number of data points evaluated during the fitting procedure + - ``'correlation'`` (dict) - dictionary of matrices (list of list of floats) containing + correlation results, keyed by correlation function name """ prop_data = self.correlation_store.query(criteria={'property_x': {'$exists': True}}, @@ -577,10 +650,12 @@ def get_correlation_matrices(self, func_name=None): def as_dict(self): """ Returns the representation of the builder as a dictionary in JSON serializable format. + Note: because functions are not JSON serializable, custom functions are omitted when - serializing the object. + serializing the object. - Returns: (dict) representation of this builder as a JSON-serializable dictionary + Returns: + dict: representation of this builder as a JSON-serializable dictionary """ d = super(CorrelationBuilder, self).as_dict() diff --git a/propnet/dbtools/correlation_with_mp.py b/propnet/dbtools/correlation_with_mp.py index b65d77b6..447d8dc4 100644 --- a/propnet/dbtools/correlation_with_mp.py +++ b/propnet/dbtools/correlation_with_mp.py @@ -1,25 +1,27 @@ -from maggma.builders import Builder -from itertools import product -import numpy as np +""" +WARNING: THIS MODULE IS DEPRECATED AND WILL BE REMOVED IN THE NEAR FUTURE. +This module is only here for backwards compatibility. Use ``propnet.dbtools.correlation`` instead. +""" import json -from collections import defaultdict -from propnet.core.graph import Graph -from propnet import ureg import logging import re +from itertools import product +from collections import defaultdict +import warnings + +from maggma.builders import Builder +import numpy as np +from propnet.core.graph import Graph +from propnet import ureg # noinspection PyUnresolvedReferences import propnet.models from propnet.core.registry import Registry +warnings.warn("The correlation_with_mp module is deprecated. Use the correlation module instead.", + DeprecationWarning) logger = logging.getLogger(__name__) -''' -WARNING: This class is only here for backwards compatibility. -Use propnet.dbtools.correlation.CorrelationBuilder instead. -Will likely be removed in the near future. -''' - class CorrelationBuilder(Builder): """ diff --git a/propnet/models/tests/test_default_models.py b/propnet/models/tests/test_default_models.py index 5187db8f..91a5ea72 100644 --- a/propnet/models/tests/test_default_models.py +++ b/propnet/models/tests/test_default_models.py @@ -3,6 +3,7 @@ # noinspection PyUnresolvedReferences from propnet.models import add_builtin_models_to_registry from propnet.core.registry import Registry +from propnet.core.symbols import Symbol from collections import defaultdict @@ -118,7 +119,7 @@ def test_model_formatting(self): for key in model.variable_symbol_map.keys(): self.assertTrue(isinstance(key, str), 'Invalid variable_symbol_map key: ' + str(key)) self.assertTrue( - isinstance(model.variable_symbol_map[key], str) + isinstance(model.variable_symbol_map[key], Symbol) and model.variable_symbol_map[key] in Registry("symbols").keys(), msg=msg) self.assertTrue( model.connections is not None and isinstance(model.connections, list) and len(model.connections) > 0,