-
Notifications
You must be signed in to change notification settings - Fork 27
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Once a correction is defined, we could in principle run optimization passes on it, in case it is constructed in some working but non-optimal combination of nodes. Examples of optimizations include:
- Detecting regular multivariate binning implemented as nested
Binningnodes and converting to a singleMultiBinningnode - Detecting formulas with all the same expression, but different numerical constants, and replacing with a
FormulaRefnode - Normalizing polynomial expressions in
Formulanodes to Horner form (this could also be done at evaluator time: Formula performance improvements #173)
The second case is something I already wrote up a sketch for:
def is_evaluatable(node):
if isinstance(node, float):
return True
elif node.nodetype in ("formula", "formularef", "transform", "hashprng"):
return True
return False
def apply_evaluatable(node, func):
if node.nodetype in ("binning", "multibinning"):
for i in range(len(node.content)):
if is_evaluatable(node.content[i]):
node.content[i] = func(node.content[i])
else:
apply_evaluatable(node.content[i], func)
elif node.nodetype == "category":
for item in node.content:
if is_evaluatable(item.value):
item.value = func(item.value)
else:
apply_evaluatable(item.value, func)
for corr in cset.corrections:
exprs = defaultdict(int)
frefs = defaultdict(int)
def visit(node):
if isinstance(node, float):
return node
elif node.nodetype == "formula":
exprs[(node.expression, node.parser, tuple(node.variables))] += 1
elif node.nodetype == "formularef":
frefs[node.index] += 1
return node
apply_evaluatable(corr.data, visit)
print(f"Correction {corr.name} has the following unique formulas:")
for (expr, _, inputs), n in exprs.items():
print(f" expr {expr} inputs {inputs} count {n}")
print("and the following generic formulas")
for i, n in frefs.items():
formula = corr.generic_formulas[i]
print(f" expr {formula.expression} inputs {tuple(formula.variables)} count {n}")
print()
for (expr, parser, inputs), n in exprs.items():
if n > 2:
print("Rewriting with FormulaRef for {expr}")
corr.generic_formulas.append(Formula(
nodetype="formula",
expression=expr,
parser=parser,
variables=list(inputs),
))
idx = len(corr.generic_formulas) - 1
def visit(node):
if isinstance(node, float):
return node
elif (
node.nodetype == "formula"
and node.expression == expr
and tuple(node.variables) == inputs
):
return FormulaRef(nodetype="formularef", index=idx, parameters=node.parameters)
return node
apply_evaluatable(corr.data, visit)Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request