Skip to content

Auto-optimization of corrections #174

@nsmith-

Description

@nsmith-

Once a correction is defined, we could in principle run optimization passes on it, in case it is constructed in some working but non-optimal combination of nodes. Examples of optimizations include:

  • Detecting regular multivariate binning implemented as nested Binning nodes and converting to a single MultiBinning node
  • Detecting formulas with all the same expression, but different numerical constants, and replacing with a FormulaRef node
  • Normalizing polynomial expressions in Formula nodes to Horner form (this could also be done at evaluator time: Formula performance improvements #173)

The second case is something I already wrote up a sketch for:

def is_evaluatable(node):
    if isinstance(node, float):
        return True
    elif node.nodetype in ("formula", "formularef", "transform", "hashprng"):
        return True
    return False


def apply_evaluatable(node, func):
    if node.nodetype in ("binning", "multibinning"):
        for i in range(len(node.content)):
            if is_evaluatable(node.content[i]):
                node.content[i] = func(node.content[i])
            else:
                apply_evaluatable(node.content[i], func)
    elif node.nodetype == "category":
        for item in node.content:
            if is_evaluatable(item.value):
                item.value = func(item.value)
            else:
                apply_evaluatable(item.value, func)


for corr in cset.corrections:
    exprs = defaultdict(int)
    frefs = defaultdict(int)
    def visit(node):
        if isinstance(node, float):
            return node
        elif node.nodetype == "formula":
            exprs[(node.expression, node.parser, tuple(node.variables))] += 1
        elif node.nodetype == "formularef":
            frefs[node.index] += 1
        return node
    
    apply_evaluatable(corr.data, visit)

    print(f"Correction {corr.name} has the following unique formulas:")
    for (expr, _, inputs), n in exprs.items():
        print(f"  expr {expr} inputs {inputs} count {n}")

    print("and the following generic formulas")
    for i, n in frefs.items():
        formula = corr.generic_formulas[i]
        print(f"  expr {formula.expression} inputs {tuple(formula.variables)} count {n}")
    print()
    
    for (expr, parser, inputs), n in exprs.items():
        if n > 2:
            print("Rewriting with FormulaRef for {expr}")
            corr.generic_formulas.append(Formula(
                nodetype="formula",
                expression=expr,
                parser=parser,
                variables=list(inputs),
            ))
            idx = len(corr.generic_formulas) - 1
            def visit(node):
                if isinstance(node, float):
                    return node
                elif (
                    node.nodetype == "formula"
                    and node.expression == expr
                    and tuple(node.variables) == inputs
                ):
                    return FormulaRef(nodetype="formularef", index=idx, parameters=node.parameters)
                return node
            apply_evaluatable(corr.data, visit)

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions