materialsintelligence · clegaspi · Aug 6, 2019 · Aug 6, 2019 · Aug 7, 2019 · Aug 7, 2019
diff --git a/docs/conf.py b/docs/conf.py
@@ -21,7 +21,7 @@
 # -- Project information -----------------------------------------------------
 
 project = u'propnet'
-copyright = u'2018, The Propnet Development Team'
+copyright = u'2019, The Propnet Development Team'
 author = u'The Propnet Development Team'
 
 # The short X.Y version

diff --git a/propnet/core/__init__.py b/propnet/core/__init__.py
@@ -0,0 +1,7 @@
+"""
+This module contains the core architecture of propnet. These components provide the infrastructure for building
+the propnet knowledge graph, populating it with data, and traversing it to uncover new data. It also contains
+analysis modules to inspect materials properties.
+
+This package is in active development, so analysis and core functionality is continually being added!
+"""
diff --git a/propnet/core/exceptions.py b/propnet/core/exceptions.py
@@ -1,6 +1,7 @@
 """
 Module defining exception and warning classes.
 """
+# TODO: Maybe expand these?
 
 
 class ModelEvaluationError(RuntimeError):
@@ -15,4 +16,4 @@ class IncompleteData(Warning):
 
 class SymbolConstraintError(RuntimeError):
     """Invalid quantity value with respect to symbol constraints"""
-    pass
+    pass
diff --git a/propnet/core/fitting.py b/propnet/core/fitting.py
@@ -1,6 +1,21 @@
-"""
-This module contains code relevant to using fitting to improve
-the aggregation process
+"""Benchmarking routine for improved aggregation of properties.
+
+This module contains routines to benchmark models against experimental values to improve the aggregation scheme
+for quantities. By default, when a ``Material`` contains multiple derived quantities for a given property (symbol),
+they are aggregated using a simple, unweighted mean. However, depending on the quality of the models used to produce
+those quantities, this may not be ideal.
+
+These routines calculate optimal weights for models given an experimental dataset of materials to match.
+
+Example:
+    >>> from propnet.core.fitting import fit_model_scores
+    >>> from propnet.core.materials import Material
+    >>> materials = [Material(...), ...]    # a list of materials populated with properties
+    >>> benchmarks = [
+    >>>     {'symbol_name': ...}, ... # a list of benchmark data as dicts
+    >>> ]
+    >>> # select models for which to calculate weights and run
+    >>> scores = fit_model_scores(materials, benchmarks, models=['model_1', 'model_2', ...])
 """
 
 
@@ -9,24 +24,25 @@
 from collections import OrderedDict
 
 import numpy as np
-
 from scipy.optimize import minimize, Bounds, LinearConstraint
-from propnet.core.quantity import QuantityFactory
 
+from propnet.core.quantity import QuantityFactory
 # noinspection PyUnresolvedReferences
 import propnet.models
 from propnet.core.registry import Registry
 
+
 def aggregate_quantities(quantities, model_score_dict=None):
     """
-    Simple method for aggregating a set of quantities
+    Simple method for aggregating a set of quantities.
 
     Args:
-        quantities:
-        model_score_dict:
+        quantities (`iterable` of `propnet.core.quantity.NumQuantity`): iterable of Quantity objects to aggregate
+        model_score_dict (dict): dict of weights to apply to models, keyed
+            by model name or Model object
 
     Returns:
-
+        propnet.core.quantity.NumQuantity: resulting quantity from aggregation
     """
     symbol = next(iter(quantities)).symbol
     if not all([q.symbol == symbol for q in quantities]):
@@ -39,14 +55,15 @@ def aggregate_quantities(quantities, model_score_dict=None):
 
 def get_weight(quantity, model_score_dict=None):
     """
-    Gets weight based on scoring scheme
+    Calculates weight based on scoring scheme and provenance of quantities.
 
     Args:
-        quantity (Quantity): quantity for which to get weight
-        model_score_dict ({str: float}): dictionary of model names to scores
+        quantity (propnet.core.quantity.NumQuantity): quantity for which to get weight
+        model_score_dict (dict): dict of weights as floats to apply to models, keyed
+            by model name or Model object
 
     Returns:
-        calculated weight for input quantity
+        float: calculated weight for input quantity
     """
     if quantity.provenance is None or quantity.provenance.inputs is None:
         return 1
@@ -62,25 +79,23 @@ def get_weight(quantity, model_score_dict=None):
 def fit_model_scores(materials, benchmarks, models=None,
                      init_scores=None, constrain_sum=False):
     """
-    Fits a set of model scores to a set of benchmark data
+    Fits a set of model scores/weights to a set of benchmark data by minimizing the sum of squared errors
+    with the benchmarking data.
 
     Args:
-        materials ([Material]): list of evaluated materials containing
+        materials (`list` of `propnet.core.materials.Material`): list of evaluated materials containing
             symbols for benchmarking
-        benchmarks ([{Symbol or str: float}]): list of dicts, keyed by Symbol
-            or symbol name containing benchmark data for each material in ``materials``.
-        models ([Model or str]): list of models which should have their
-            scores adjusted in the aggregation weighting scheme
-        init_scores ({str: float}): initial scores for minimization procedure.
-            If unspecified, all scores are equal. Scores are normalized to sum of
-            scores.
-        constrain_sum (bool): True constrains the sum of weights to 1, False
-            removes this constraint. Default: False (no constraint)
+        benchmarks (`list` of `dict`): list of dicts, keyed by Symbol or symbol name, containing benchmark data
+            for each material in ``materials``.
+        models (`list` of `propnet.core.models.Model` or `list` of `str` or `None`): optional, list of models whose
+            scores will be adjusted in the aggregation weighting scheme. Default: `None` (all models will be adjusted)
+        init_scores (dict): optional, dict containing initial scores for minimization procedure, keyed by model name
+            or Model. Scores are normalized to sum of scores. Default: `None` (all scores are equal)
+        constrain_sum (bool): optional, ``True`` constrains the sum of scores to 1, ``False``
+            removes this constraint. Default: ``False`` (no constraint)
 
     Returns:
-        {str: float} scores corresponding to those which minimize
-            SSE for the benchmarked dataset
-
+        OrderedDict: dict of scores corresponding to the minimized sum of squared errors, keyed by model.
     """
     # Probably not smart to have ALL available models in the list. That's a lot of DOF.
     # TODO: Perhaps write a method to produce a list of models in the provenance trees
@@ -109,18 +124,19 @@ def f(f_scores):
 
 def get_sse(materials, benchmarks, model_score_dict=None):
     """
-    Function to get the sum squared error of a set of benchmarks
-    with aggregated data from the model scoring scheme above
+    Calculate the sum squared error for aggregated data
+    weighted by the specified model scoring scheme against a set of benchmarks.
 
     Args:
-        materials ([Material]): list of materials to evaluate
-        benchmarks ([{Symbol or str: float}]): list of benchmarks
-            for each material
-        model_score_dict ({str: float}): model score dictionary
-            with scores for each model name
+        materials (`list` of `propnet.core.materials.Material`): list of evaluated materials containing
+            symbols for benchmarking
+        benchmarks (`list` of `dict`): list of dicts, keyed by Symbol or symbol name, containing benchmark data
+            for each material in ``materials``.
+        model_score_dict (dict): dict of weights as floats to apply to models, keyed
+            by model name or Model object
 
     Returns:
-        (float): sum squared error over all the benchmarks
+        float: sum squared error over all the benchmarks
 
     """
     sse = 0