From cb499bc0b5a07b127af2297849b0a244a9f2c0d4 Mon Sep 17 00:00:00 2001 From: Matthew Wardrop Date: Fri, 7 Oct 2022 10:00:50 -0700 Subject: [PATCH 1/2] Allow `Term` instances to preserve rank even when `ensure_full_rank` is set to `True`. --- formulaic/materializers/base.py | 52 +++++++++++++++++++-------------- formulaic/parser/types/term.py | 6 +++- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/formulaic/materializers/base.py b/formulaic/materializers/base.py index 06fbba03..a6cc7154 100644 --- a/formulaic/materializers/base.py +++ b/formulaic/materializers/base.py @@ -352,29 +352,37 @@ def _get_scoped_terms(self, terms, ensure_full_rank=True): term_span = self._get_scoped_terms_spanned_by_evaled_factors( evaled_factors ).difference(spanned) - scoped_terms = self._simplify_scoped_terms(term_span) spanned.update(term_span) - else: - scoped_terms = [ - ScopedTerm( - factors=( - ScopedFactor(evaled_factor, reduced=False) - for evaled_factor in evaled_factors - if evaled_factor.metadata.kind is not Factor.Kind.CONSTANT - ), - scale=functools.reduce( - operator.mul, - [ - evaled_factor.values - for evaled_factor in evaled_factors - if evaled_factor.metadata.kind.value - is Factor.Kind.CONSTANT - ], - 1, - ), - ) - ] - yield term, scoped_terms + + if not term.preserve_rank: + yield term, self._simplify_scoped_terms(term_span) + continue + yield term, [self._get_scoped_term_for_evaled_factors(evaled_factors)] + + @classmethod + def _get_scoped_term_for_evaled_factors( + self, evaled_factors: List[EvaluatedFactor] + ) -> ScopedTerm: + """ + Convert evaluated factors directly into a `ScopedTerm`. Useful for when + you want to preserve the factors of a evaluated factor. + """ + return ScopedTerm( + factors=( + ScopedFactor(evaled_factor, reduced=False) + for evaled_factor in evaled_factors + if evaled_factor.metadata.kind is not Factor.Kind.CONSTANT + ), + scale=functools.reduce( + operator.mul, + [ + evaled_factor.values + for evaled_factor in evaled_factors + if evaled_factor.metadata.kind.value is Factor.Kind.CONSTANT + ], + 1, + ), + ) @classmethod def _get_scoped_terms_spanned_by_evaled_factors( diff --git a/formulaic/parser/types/term.py b/formulaic/parser/types/term.py index 8a458534..a34e6ab5 100644 --- a/formulaic/parser/types/term.py +++ b/formulaic/parser/types/term.py @@ -13,10 +13,14 @@ class Term: Attributes: factors: The set of factors to be multipled to form the term. + preserve_rank: Whether to preserve the term structure even when + `ensure_full_rank` is specified. Other terms without this set may + still be affected by the presence of this term. """ - def __init__(self, factors: Iterable["Factor"]): + def __init__(self, factors: Iterable["Factor"], preserve_rank: bool = False): self.factors = tuple(sorted(set(factors))) + self.preserve_rank = preserve_rank self._factor_exprs = tuple(factor.expr for factor in self.factors) self._hash = hash(repr(self)) From e14be6c553d1f873fe392bcc8405e0c1c05bfb0c Mon Sep 17 00:00:00 2001 From: Matthew Wardrop Date: Fri, 7 Oct 2022 10:19:19 -0700 Subject: [PATCH 2/2] Add mixed effect operators. --- formulaic/parser/parser.py | 37 +++++++++++++++++++++++++++++- formulaic/parser/types/__init__.py | 3 ++- formulaic/parser/types/term.py | 20 ++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/formulaic/parser/parser.py b/formulaic/parser/parser.py index 802be319..42b5654e 100644 --- a/formulaic/parser/parser.py +++ b/formulaic/parser/parser.py @@ -12,6 +12,7 @@ OperatorResolver, Structured, Term, + TermGroup, Token, ) from .utils import ( @@ -101,7 +102,7 @@ def get_tokens(self, formula: str) -> Iterable[Token]: ), *insert_tokens_after( tokens[rhs_index:], - r"\|", + r"\|+", [token_one], kind=Token.Kind.OPERATOR, join_operator="+", @@ -193,6 +194,40 @@ def power(arg: Set[Term], power: Set[Term]) -> Set[Term]: ), structural=True, ), + Operator( + "|", + arity=2, + precedence=50, + associativity="left", + to_terms=lambda terms, groups: ( + Structured( + group={ + TermGroup(term, group) + for term, group in itertools.product( + terms, groups.difference({"1"}) + ) + }, + ) + ), + accepts_context=lambda context: context and context[-1] == "(", + ), + Operator( + "||", + arity=2, + precedence=50, + associativity="left", + to_terms=lambda terms, groups: ( + Structured( + group_independent={ + TermGroup(term, group, joiner="||") + for term, group in itertools.product( + terms, groups.difference({"1"}) + ) + }, + ) + ), + accepts_context=lambda context: context and context[-1] == "(", + ), Operator( "+", arity=2, diff --git a/formulaic/parser/types/__init__.py b/formulaic/parser/types/__init__.py index db2e2d5c..05e61d91 100644 --- a/formulaic/parser/types/__init__.py +++ b/formulaic/parser/types/__init__.py @@ -4,7 +4,7 @@ from .operator import Operator from .operator_resolver import OperatorResolver from .structured import Structured -from .term import Term +from .term import Term, TermGroup from .token import Token @@ -16,5 +16,6 @@ "OperatorResolver", "Structured", "Term", + "TermGroup", "Token", ] diff --git a/formulaic/parser/types/term.py b/formulaic/parser/types/term.py index a34e6ab5..b1519a64 100644 --- a/formulaic/parser/types/term.py +++ b/formulaic/parser/types/term.py @@ -52,3 +52,23 @@ def __lt__(self, other): def __repr__(self): return ":".join(self._factor_exprs) + + +class TermGroup(Term): + """ + Represents a group randomized term a formula. + + Attributes: + term: + group: + joiner: + """ + + def __init__(self, term, group, joiner="|"): + self.term = term + self.group = group + self.joiner = joiner + super().__init__(factors=[*term.factors, *group.factors], preserve_rank=True) + + def __repr__(self): + return repr(self.term) + self.joiner + repr(self.group)