CLMBRs · shanest · Jul 3, 2025 · Jul 3, 2025
diff --git a/src/examples/indefinites/grammar.py b/src/examples/indefinites/grammar.py
@@ -1,4 +1,4 @@
-from ultk.language.grammar import Grammar, Rule
+from ultk.language.grammar.grammar import Grammar, Rule
 
 # indefinites_grammar = Grammar.from_yaml("indefinites/grammar.yml")
 indefinites_grammar = Grammar.from_module("indefinites.grammar_functions")

diff --git a/src/examples/indefinites/measures.py b/src/examples/indefinites/measures.py
@@ -1,5 +1,5 @@
 from ultk.effcomm.informativity import informativity
-from ultk.language.grammar import GrammaticalExpression
+from ultk.language.grammar.grammar import GrammaticalExpression
 from ultk.language.language import Language, aggregate_expression_complexity
 from ultk.language.semantics import Meaning
 

diff --git a/src/examples/indefinites/scripts/generate_expressions.py b/src/examples/indefinites/scripts/generate_expressions.py
@@ -1,7 +1,7 @@
 from ultk.util.io import write_expressions
 
 from ultk.language.semantics import Meaning
-from ultk.language.grammar import GrammaticalExpression
+from ultk.language.grammar.grammar import GrammaticalExpression
 from ..grammar import indefinites_grammar
 from ..meaning import universe as indefinites_universe
 

diff --git a/src/examples/modals/grammar.py b/src/examples/modals/grammar.py
@@ -1,3 +1,3 @@
-from ultk.language.grammar import Grammar
+from ultk.language.grammar.grammar import Grammar
 
 modals_grammar = Grammar.from_yaml("modals/data/grammar.yaml")
diff --git a/src/examples/modals/measures.py b/src/examples/modals/measures.py
@@ -5,7 +5,7 @@
     Referent,
     aggregate_expression_complexity,
 )
-from ultk.language.grammar import GrammaticalExpression
+from ultk.language.grammar.grammar import GrammaticalExpression
 from ultk.effcomm.informativity import informativity, build_pairwise_matrix
 
 from .meaning import universe as modals_universe

diff --git a/src/examples/modals/scripts/generate_expressions.py b/src/examples/modals/scripts/generate_expressions.py
@@ -1,7 +1,7 @@
 from ultk.util.io import write_expressions
 
 from ultk.language.semantics import Meaning
-from ultk.language.grammar import GrammaticalExpression
+from ultk.language.grammar.grammar import GrammaticalExpression
 from ..grammar import modals_grammar
 from ..meaning import universe as modals_universe
 

diff --git a/src/tests/test_grammar.py b/src/tests/test_grammar.py
@@ -1,4 +1,4 @@
-from ultk.language.grammar import Grammar, GrammaticalExpression, Rule
+from ultk.language.grammar.grammar import Grammar, GrammaticalExpression, Rule
 from ultk.language.semantics import Meaning, Referent, Universe
 
 

diff --git a/src/tests/test_likelihood.py b/src/tests/test_likelihood.py
@@ -0,0 +1,57 @@
+from ultk.language.grammar.likelihood import (
+    all_or_nothing,
+    percent_match_unique,
+    percent_match,
+    noise_match,
+)
+from math import log
+
+
+# The expression can simply act as a function for this purpose
+def expression(_):
+    return True
+
+
+def even(i):
+    return i % 2 == 0
+
+
+class TestLikelihood:
+    all_true = [(i, True) for i in range(10)]
+    all_false = [(i, False) for i in range(10)]
+    half = [(i, i % 2 == 0) for i in range(10)]
+
+    def test_all_or_nothing(self):
+        assert all_or_nothing(self.all_true, expression) == 1
+        assert all_or_nothing(self.all_false, expression) == 0
+        assert all_or_nothing(self.half, expression) == 0
+
+    def test_percent_match(self):
+        assert percent_match(self.all_true, expression) == 1
+        assert percent_match(self.all_false, expression) == 0
+        assert percent_match(self.half, expression) == 0.5
+
+    def test_percent_match_unique(self):
+        assert percent_match_unique(self.all_true, expression) == 0
+        assert percent_match_unique(self.all_false, expression) == 0
+        assert percent_match_unique(self.half, expression) == 0
+        assert percent_match_unique(self.all_true, even) == 0.5
+        assert percent_match_unique(self.all_false, even) == 0.5
+        assert percent_match_unique(self.half, even) == 1
+
+    def test_noise_match(self):
+        noise_match_func = noise_match(2)
+        assert (
+            abs(noise_match_func(self.all_true, expression) - log(0.995) * 10) < 0.00001
+        )
+        assert (
+            abs(noise_match_func(self.all_false, expression) - log(0.005) * 10)
+            < 0.00001
+        )
+        assert (
+            abs(
+                noise_match_func(self.half, expression)
+                - (log(0.995) * 5 + log(0.005) * 5)
+            )
+            < 0.00001
+        )
diff --git a/src/ultk/language/grammar/__init__.py b/src/ultk/language/grammar/__init__.py
diff --git a/src/ultk/language/grammar.py → src/ultk/language/grammar/grammar.py b/src/ultk/language/grammar.py → src/ultk/language/grammar/grammar.py
@@ -8,6 +8,8 @@
 from itertools import product
 from typing import Any, Callable, Generator, TypedDict, TypeVar
 from yaml import load
+from functools import cache
+from math import log
 
 try:
     from yaml import CLoader as Loader
@@ -90,7 +92,7 @@ def _and(p1: bool, p2: bool) -> bool:
             del args["weight"]
         # allow custon names too
         rule_name = func.__name__
-        if "name" in args:
+        if "name" in args and args["name"].default is not inspect._empty:
             rule_name = args["name"].default
             del args["name"]
         # parameters = {'name': Parameter} ordereddict, so we want the values
@@ -178,6 +180,26 @@ def count_atoms(self):
             return 1
         return sum(child.count_atoms() for child in self.children)
 
+    def replace_children(self, children) -> None:
+        self.children = children
+
+    @cache
+    def node_count(self) -> int:
+        """Count the node of a GrammaticalExpression
+
+        Returns:
+            int: node count
+        """
+        counter = 1
+        stack = [self]
+        while stack:
+            current_node = stack.pop()
+            children = current_node.children if current_node.children else ()
+            for child in children:
+                stack.append(child)
+                counter += 1
+        return counter
+
     @classmethod
     def from_dict(cls, the_dict: dict, grammar: "Grammar") -> "GrammaticalExpression":
         children = the_dict.get("children")
@@ -258,6 +280,46 @@ def add_rule(self, rule: Rule):
             )
         self._rules_by_name[rule.name] = rule
 
+    # @cache, unhashable, or embed it as a property (change every time Grammar is changed)
+    def probability(self, rule: Rule) -> float:
+        return float(rule.weight) / sum([r.weight for r in self._rules[rule.lhs]])
+
+    # @cache, unhashable, or embed it as a property (change every time Grammar is changed)
+    def log_probability(self, rule: Rule) -> float:
+        return log(float(rule.weight)) - log(
+            sum([r.weight for r in self._rules[rule.lhs]])
+        )
+
+    def prior(self, expr: GrammaticalExpression) -> float:
+        """Prior of a GrammaticalExpression
+
+        Args:
+            expr (GrammaticalExpression): the GrammaticalExpression for compuation
+
+        Returns:
+            float: prior
+        """
+        probability = self.probability(self._rules_by_name[expr.rule_name])
+        children = expr.children if expr.children else ()
+        for child in children:
+            probability = probability * (self.prior(child))
+        return probability
+
+    def log_prior(self, expr: GrammaticalExpression) -> float:
+        """Prior of a GrammaticalExpression in log probability
+
+        Args:
+            expr (GrammaticalExpression): the GrammaticalExpression for compuation
+
+        Returns:
+            float: log prior
+        """
+        probability = self.log_probability(self._rules_by_name[expr.rule_name])
+        children = expr.children if expr.children else ()
+        for child in children:
+            probability = probability + (self.log_prior(child))
+        return probability
+
     def parse(
         self,
         expression: str,
@@ -321,21 +383,29 @@ def parse(
                     )
                 )
         if len(stack) != 1:
-            raise ValueError("Could not parse string {expression}")
+            raise ValueError(f"Could not parse string {expression}")
         return stack[0]
 
-    def generate(self, lhs: Any = None) -> GrammaticalExpression:
+    def generate(self, lhs: Any = None, max_depth=3, depth=0) -> GrammaticalExpression:
         """Generate an expression from a given lhs."""
         if lhs is None:
             lhs = self._start
         rules = self._rules[lhs]
+        # Stop there from being a high chance of infinite recusion
+        if depth > max_depth:
+            filtered_rules = list(filter(lambda rule: rule.rhs is None, rules))
+            if len(filtered_rules) != 0:
+                rules = filtered_rules
         the_rule = random.choices(rules, weights=[rule.weight for rule in rules], k=1)[
             0
         ]
         children = (
             None
             if the_rule.rhs is None
-            else tuple([self.generate(child_lhs) for child_lhs in the_rule.rhs])
+            else tuple(
+                self.generate(child_lhs, max_depth=max_depth, depth=depth + 1)
+                for child_lhs in the_rule.rhs
+            )
         )
         # if the rule is terminal, rhs will be empty, so no recursive calls to generate will be made in this comprehension
         return GrammaticalExpression(
@@ -550,6 +620,9 @@ def from_module(cls, module_name: str) -> "Grammar":
         The module should have a list of type-annotated method definitions, each of which will correspond to one Rule in the new Grammar.
         See the docstring for `Rule.from_callable` for more information on how that step works.
 
+        The function will normally attempt to convert all functions (including imported functions) into Rules. However, if a tuple of
+        functions called `grammar_rules` is defined in the module, it will only try to convert the functions contained in the tuple.
+
         The start symbol of the grammar can either be specified by `start = XXX` somewhere in the module,
         or will default to the LHS of the first rule in the module (aka the return type annotation of the first method definition).
 
@@ -558,7 +631,11 @@ def from_module(cls, module_name: str) -> "Grammar":
         """
         module = import_module(module_name)
         grammar = cls(None)
-        for name, value in inspect.getmembers(module):
+        if hasattr(module, "grammar_rules") and type(module.grammar_rules) == tuple:
+            possible_rules = module.grammar_rules
+        else:
+            possible_rules = tuple(value for _, value in inspect.getmembers(module))
+        for value in possible_rules:
             # functions become rules
             if inspect.isfunction(value):
                 grammar.add_rule(Rule.from_callable(value))