Skip to content
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# -- Project information -----------------------------------------------------

project = u'propnet'
copyright = u'2018, The Propnet Development Team'
copyright = u'2019, The Propnet Development Team'
author = u'The Propnet Development Team'

# The short X.Y version
Expand Down
7 changes: 7 additions & 0 deletions propnet/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""
This module contains the core architecture of propnet. These components provide the infrastructure for building
the propnet knowledge graph, populating it with data, and traversing it to uncover new data. It also contains
analysis modules to inspect materials properties.

This package is in active development, so analysis and core functionality is continually being added!
"""
3 changes: 2 additions & 1 deletion propnet/core/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Module defining exception and warning classes.
"""
# TODO: Maybe expand these?


class ModelEvaluationError(RuntimeError):
Expand All @@ -15,4 +16,4 @@ class IncompleteData(Warning):

class SymbolConstraintError(RuntimeError):
"""Invalid quantity value with respect to symbol constraints"""
pass
pass
86 changes: 51 additions & 35 deletions propnet/core/fitting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
"""
This module contains code relevant to using fitting to improve
the aggregation process
"""Benchmarking routine for improved aggregation of properties.

This module contains routines to benchmark models against experimental values to improve the aggregation scheme
for quantities. By default, when a ``Material`` contains multiple derived quantities for a given property (symbol),
they are aggregated using a simple, unweighted mean. However, depending on the quality of the models used to produce
those quantities, this may not be ideal.

These routines calculate optimal weights for models given an experimental dataset of materials to match.

Example:
>>> from propnet.core.fitting import fit_model_scores
>>> from propnet.core.materials import Material
>>> materials = [Material(...), ...] # a list of materials populated with properties
>>> benchmarks = [
>>> {'symbol_name': ...}, ... # a list of benchmark data as dicts
>>> ]
>>> # select models for which to calculate weights and run
>>> scores = fit_model_scores(materials, benchmarks, models=['model_1', 'model_2', ...])
"""


Expand All @@ -9,24 +24,25 @@
from collections import OrderedDict

import numpy as np

from scipy.optimize import minimize, Bounds, LinearConstraint
from propnet.core.quantity import QuantityFactory

from propnet.core.quantity import QuantityFactory
# noinspection PyUnresolvedReferences
import propnet.models
from propnet.core.registry import Registry


def aggregate_quantities(quantities, model_score_dict=None):
"""
Simple method for aggregating a set of quantities
Simple method for aggregating a set of quantities.

Args:
quantities:
model_score_dict:
quantities (`iterable` of `propnet.core.quantity.NumQuantity`): iterable of Quantity objects to aggregate
model_score_dict (dict): dict of weights to apply to models, keyed
by model name or Model object

Returns:

propnet.core.quantity.NumQuantity: resulting quantity from aggregation
"""
symbol = next(iter(quantities)).symbol
if not all([q.symbol == symbol for q in quantities]):
Expand All @@ -39,14 +55,15 @@ def aggregate_quantities(quantities, model_score_dict=None):

def get_weight(quantity, model_score_dict=None):
"""
Gets weight based on scoring scheme
Calculates weight based on scoring scheme and provenance of quantities.

Args:
quantity (Quantity): quantity for which to get weight
model_score_dict ({str: float}): dictionary of model names to scores
quantity (propnet.core.quantity.NumQuantity): quantity for which to get weight
model_score_dict (dict): dict of weights as floats to apply to models, keyed
by model name or Model object

Returns:
calculated weight for input quantity
float: calculated weight for input quantity
"""
if quantity.provenance is None or quantity.provenance.inputs is None:
return 1
Expand All @@ -62,25 +79,23 @@ def get_weight(quantity, model_score_dict=None):
def fit_model_scores(materials, benchmarks, models=None,
init_scores=None, constrain_sum=False):
"""
Fits a set of model scores to a set of benchmark data
Fits a set of model scores/weights to a set of benchmark data by minimizing the sum of squared errors
with the benchmarking data.

Args:
materials ([Material]): list of evaluated materials containing
materials (`list` of `propnet.core.materials.Material`): list of evaluated materials containing
symbols for benchmarking
benchmarks ([{Symbol or str: float}]): list of dicts, keyed by Symbol
or symbol name containing benchmark data for each material in ``materials``.
models ([Model or str]): list of models which should have their
scores adjusted in the aggregation weighting scheme
init_scores ({str: float}): initial scores for minimization procedure.
If unspecified, all scores are equal. Scores are normalized to sum of
scores.
constrain_sum (bool): True constrains the sum of weights to 1, False
removes this constraint. Default: False (no constraint)
benchmarks (`list` of `dict`): list of dicts, keyed by Symbol or symbol name, containing benchmark data
for each material in ``materials``.
models (`list` of `propnet.core.models.Model` or `list` of `str` or `None`): optional, list of models whose
scores will be adjusted in the aggregation weighting scheme. Default: `None` (all models will be adjusted)
init_scores (dict): optional, dict containing initial scores for minimization procedure, keyed by model name
or Model. Scores are normalized to sum of scores. Default: `None` (all scores are equal)
constrain_sum (bool): optional, ``True`` constrains the sum of scores to 1, ``False``
removes this constraint. Default: ``False`` (no constraint)

Returns:
{str: float} scores corresponding to those which minimize
SSE for the benchmarked dataset

OrderedDict: dict of scores corresponding to the minimized sum of squared errors, keyed by model.
"""
# Probably not smart to have ALL available models in the list. That's a lot of DOF.
# TODO: Perhaps write a method to produce a list of models in the provenance trees
Expand Down Expand Up @@ -109,18 +124,19 @@ def f(f_scores):

def get_sse(materials, benchmarks, model_score_dict=None):
"""
Function to get the sum squared error of a set of benchmarks
with aggregated data from the model scoring scheme above
Calculate the sum squared error for aggregated data
weighted by the specified model scoring scheme against a set of benchmarks.

Args:
materials ([Material]): list of materials to evaluate
benchmarks ([{Symbol or str: float}]): list of benchmarks
for each material
model_score_dict ({str: float}): model score dictionary
with scores for each model name
materials (`list` of `propnet.core.materials.Material`): list of evaluated materials containing
symbols for benchmarking
benchmarks (`list` of `dict`): list of dicts, keyed by Symbol or symbol name, containing benchmark data
for each material in ``materials``.
model_score_dict (dict): dict of weights as floats to apply to models, keyed
by model name or Model object

Returns:
(float): sum squared error over all the benchmarks
float: sum squared error over all the benchmarks

"""
sse = 0
Expand Down
Loading