From 6ec4f6237970a9cbf4cb8defd42ab8620b6d7c92 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Fri, 10 Oct 2025 06:32:06 +0000
Subject: [PATCH 01/16] refactor: separate statistic computation

we also make it lazy
---
 spras/analysis/summary.py | 44 +++-----------------
 spras/statistics.py       | 88 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+), 38 deletions(-)
 create mode 100644 spras/statistics.py

diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py
index c8abc1cad..fd70db8f3 100644
--- a/spras/analysis/summary.py
+++ b/spras/analysis/summary.py
@@ -1,10 +1,11 @@
 from pathlib import Path
-from statistics import median
 from typing import Iterable
 
 import networkx as nx
 import pandas as pd
 
+from spras.statistics import compute_statistics, statistics_options
+
 
 def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame, algo_params: dict[str, dict],
                        algo_with_params: list) -> pd.DataFrame:
@@ -47,44 +48,11 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame, alg
 
         # Save the network name, number of nodes, number edges, and number of connected components
         nw_name = str(file_path)
-        number_nodes = nw.number_of_nodes()
-        number_edges = nw.number_of_edges()
-        ncc = nx.number_connected_components(nw)
-
-        # Save the max/median degree, average clustering coefficient, and density
-        if number_nodes == 0:
-            max_degree = 0
-            median_degree = 0.0
-            density = 0.0
-        else:
-            degrees = [deg for _, deg in nw.degree()]
-            max_degree = max(degrees)
-            median_degree = median(degrees)
-            density = nx.density(nw)
-
-        cc = list(nx.connected_components(nw))
-        # Save the max diameter
-        # Use diameter only for components with ≥2 nodes (singleton components have diameter 0)
-        diameters = [
-            nx.diameter(nw.subgraph(c).copy()) if len(c) > 1 else 0
-            for c in cc
-        ]
-        max_diameter = max(diameters, default=0)
-
-        # Save the average path lengths
-        # Compute average shortest path length only for components with ≥2 nodes (undefined for singletons, set to 0.0)
-        avg_path_lengths = [
-            nx.average_shortest_path_length(nw.subgraph(c).copy()) if len(c) > 1 else 0.0
-            for c in cc
-        ]
-
-        if len(avg_path_lengths) != 0:
-            avg_path_len = sum(avg_path_lengths) / len(avg_path_lengths)
-        else:
-            avg_path_len = 0.0
+
+        graph_statistics = compute_statistics(nw, statistics_options)
 
         # Initialize list to store current network information
-        cur_nw_info = [nw_name, number_nodes, number_edges, ncc, density, max_degree, median_degree, max_diameter, avg_path_len]
+        cur_nw_info = [nw_name, *graph_statistics]
 
         # Iterate through each node property and save the intersection with the current network
         for node_list in nodes_by_col:
@@ -104,7 +72,7 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame, alg
         nw_info.append(cur_nw_info)
 
     # Prepare column names
-    col_names = ['Name', 'Number of nodes', 'Number of edges', 'Number of connected components', 'Density', 'Max degree', 'Median degree', 'Max diameter', 'Average path length']
+    col_names = ['Name', *statistics_options]
     col_names.extend(nodes_by_col_labs)
     col_names.append('Parameter combination')
 
diff --git a/spras/statistics.py b/spras/statistics.py
new file mode 100644
index 000000000..843e5292a
--- /dev/null
+++ b/spras/statistics.py
@@ -0,0 +1,88 @@
+"""
+Graph statistics, used to power summary.py.
+
+We allow for arbitrary computation of any specific statistic on some graph,
+computing more than necessary if we have dependencies. See the top level
+`statistics_computation` dictionary for usage.
+"""
+
+import itertools
+import networkx as nx
+from statistics import median
+from typing import Callable
+
+def compute_degree(graph: nx.DiGraph) -> tuple[int, float]:
+    """
+    Computes the (max, median) degree of a `graph`.
+    """
+    # number_of_nodes is a cheap call
+    if graph.number_of_nodes() == 0:
+        return (0, 0.0)
+    else:
+        degrees = [deg for _, deg in graph.degree()]
+        return max(degrees), median(degrees)
+
+def compute_on_cc(graph: nx.DiGraph) -> tuple[int, float]:
+    cc = list(nx.connected_components(graph))
+    # Save the max diameter
+    # Use diameter only for components with ≥2 nodes (singleton components have diameter 0)
+    diameters = [
+        nx.diameter(graph.subgraph(c).copy()) if len(c) > 1 else 0
+        for c in cc
+    ]
+    max_diameter = max(diameters, default=0)
+
+    # Save the average path lengths
+    # Compute average shortest path length only for components with ≥2 nodes (undefined for singletons, set to 0.0)
+    avg_path_lengths = [
+        nx.average_shortest_path_length(graph.subgraph(c).copy()) if len(c) > 1 else 0.0
+        for c in cc
+    ]
+
+    if len(avg_path_lengths) != 0:
+        avg_path_len = sum(avg_path_lengths) / len(avg_path_lengths)
+    else:
+        avg_path_len = 0.0
+    
+    return max_diameter, avg_path_len
+
+# The type signature on here is quite bad. I would like to say that an n-tuple has n-outputs.
+statistics_computation: dict[tuple[str, ...], Callable[[nx.DiGraph], tuple[float | int, ...]]] = {
+    ('Number of nodes',): lambda graph : (graph.number_of_nodes(),),
+    ('Number of edges',): lambda graph : (graph.number_of_edges(),),
+    ('Number of connected components',): lambda graph : (nx.number_connected_components(graph),),
+    ('Density',): lambda graph : (nx.density(graph),),
+    
+    ('Max degree', 'Median degree'): compute_degree,
+    ('Max diameter', 'Average path length'): compute_on_cc,
+}
+
+# All of the keys inside statistics_computation, flattened.
+statistics_options: list[str] = list(itertools.chain(*(list(key) for key in statistics_computation.keys())))
+
+def compute_statistics(graph: nx.DiGraph, statistics: list[str]) -> dict[str, float | int]:
+    """
+    Computes `statistics` for a graph corresponding to the top-level `statistics` dictionary
+    in this file. 
+    """
+
+    # early-scan cutoff for statistics:
+    # we want to err as soon as possible
+    for stat in statistics:
+        if stat not in statistics_options:
+            raise RuntimeError(f"Statistic {stat} not a computable statistics! Available statistics: {statistics_options}")
+    
+    # now, we can compute statistics only
+    computed_statistics: dict[str, float | int] = dict()
+    for statistic_tuple, compute in statistics_computation.items():
+        # when we want them
+        if not set(statistic_tuple).isdisjoint(set(statistics)):
+            computed_tuple = compute(graph)
+            assert len(statistic_tuple) == computed_tuple, f"bad tuple length for {statistic_tuple}"
+
+            current_computed_statistics = zip(statistic_tuple, computed_tuple)
+            for stat, value in current_computed_statistics:
+                computed_statistics[stat] = value
+
+    # (and return only the statistics we wanted)
+    return {key: computed_statistics[key] for key in statistics}

From 9987189d8e0d9a9006ae1897cd44836500a5c906 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Fri, 10 Oct 2025 06:48:54 +0000
Subject: [PATCH 02/16] fix: correct tuple assumption

---
 spras/statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/statistics.py b/spras/statistics.py
index 843e5292a..ac91b80a9 100644
--- a/spras/statistics.py
+++ b/spras/statistics.py
@@ -78,7 +78,7 @@ def compute_statistics(graph: nx.DiGraph, statistics: list[str]) -> dict[str, fl
         # when we want them
         if not set(statistic_tuple).isdisjoint(set(statistics)):
             computed_tuple = compute(graph)
-            assert len(statistic_tuple) == computed_tuple, f"bad tuple length for {statistic_tuple}"
+            assert len(statistic_tuple) == len(computed_tuple), f"bad tuple length for {statistic_tuple}"
 
             current_computed_statistics = zip(statistic_tuple, computed_tuple)
             for stat, value in current_computed_statistics:

From 25eef5e72aee4fb7aea6f6b5e9d11dff7fd5be16 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Fri, 10 Oct 2025 07:06:46 +0000
Subject: [PATCH 03/16] fix: stably use graph statistic values

---
 spras/analysis/summary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py
index fd70db8f3..432dba0a4 100644
--- a/spras/analysis/summary.py
+++ b/spras/analysis/summary.py
@@ -52,7 +52,7 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame, alg
         graph_statistics = compute_statistics(nw, statistics_options)
 
         # Initialize list to store current network information
-        cur_nw_info = [nw_name, *graph_statistics]
+        cur_nw_info = [nw_name, *graph_statistics.values()]
 
         # Iterate through each node property and save the intersection with the current network
         for node_list in nodes_by_col:

From cb373c130760c7040b16ec03ba1d2673e343465b Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Wed, 29 Oct 2025 17:56:22 -0700
Subject: [PATCH 04/16] style: fmt

---
 spras/config/config.py |  4 ++--
 spras/statistics.py    | 12 +++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/spras/config/config.py b/spras/config/config.py
index 22e655941..add815d9d 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -71,7 +71,7 @@ def __init__(self, raw_config: dict[str, Any]):
         self.container_prefix: str = DEFAULT_CONTAINER_PREFIX
         # A Boolean specifying whether to unpack singularity containers. Default is False
         self.unpack_singularity = False
-        # A Boolean indiciating whether to enable container runtime profiling (apptainer/singularity only)
+        # A Boolean indicating whether to enable container runtime profiling (apptainer/singularity only)
         self.enable_profiling = False
         # A dictionary to store configured datasets against which SPRAS will be run
         self.datasets = None
@@ -308,7 +308,7 @@ def process_config(self, raw_config: RawConfig):
         if raw_config.container_registry and raw_config.container_registry.base_url != "" and raw_config.container_registry.owner != "":
             self.container_prefix = raw_config.container_registry.base_url + "/" + raw_config.container_registry.owner
 
-        if raw_config.enable_profiling and not raw_config.container_framework in ["singularity", "apptainer"]:
+        if raw_config.enable_profiling and raw_config.container_framework not in ["singularity", "apptainer"]:
             warnings.warn("enable_profiling is set to true, but the container framework is not singularity/apptainer. This setting will have no effect.")
         self.enable_profiling = raw_config.enable_profiling
 
diff --git a/spras/statistics.py b/spras/statistics.py
index ac91b80a9..49ae8b3fc 100644
--- a/spras/statistics.py
+++ b/spras/statistics.py
@@ -7,10 +7,12 @@
 """
 
 import itertools
-import networkx as nx
 from statistics import median
 from typing import Callable
 
+import networkx as nx
+
+
 def compute_degree(graph: nx.DiGraph) -> tuple[int, float]:
     """
     Computes the (max, median) degree of a `graph`.
@@ -43,7 +45,7 @@ def compute_on_cc(graph: nx.DiGraph) -> tuple[int, float]:
         avg_path_len = sum(avg_path_lengths) / len(avg_path_lengths)
     else:
         avg_path_len = 0.0
-    
+
     return max_diameter, avg_path_len
 
 # The type signature on here is quite bad. I would like to say that an n-tuple has n-outputs.
@@ -52,7 +54,7 @@ def compute_on_cc(graph: nx.DiGraph) -> tuple[int, float]:
     ('Number of edges',): lambda graph : (graph.number_of_edges(),),
     ('Number of connected components',): lambda graph : (nx.number_connected_components(graph),),
     ('Density',): lambda graph : (nx.density(graph),),
-    
+
     ('Max degree', 'Median degree'): compute_degree,
     ('Max diameter', 'Average path length'): compute_on_cc,
 }
@@ -63,7 +65,7 @@ def compute_on_cc(graph: nx.DiGraph) -> tuple[int, float]:
 def compute_statistics(graph: nx.DiGraph, statistics: list[str]) -> dict[str, float | int]:
     """
     Computes `statistics` for a graph corresponding to the top-level `statistics` dictionary
-    in this file. 
+    in this file.
     """
 
     # early-scan cutoff for statistics:
@@ -71,7 +73,7 @@ def compute_statistics(graph: nx.DiGraph, statistics: list[str]) -> dict[str, fl
     for stat in statistics:
         if stat not in statistics_options:
             raise RuntimeError(f"Statistic {stat} not a computable statistics! Available statistics: {statistics_options}")
-    
+
     # now, we can compute statistics only
     computed_statistics: dict[str, float | int] = dict()
     for statistic_tuple, compute in statistics_computation.items():

From 4640bc0c3fcf57f2427d2bb0200381d7ce8ad6cb Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 30 Oct 2025 01:13:01 +0000
Subject: [PATCH 05/16] feat: init intervals and heuristics

---
 spras/config/schema.py |  50 ++++++++++
 spras/interval.py      | 203 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 253 insertions(+)
 create mode 100644 spras/interval.py

diff --git a/spras/config/schema.py b/spras/config/schema.py
index 2b46aaf1c..fbacb8793 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -13,9 +13,12 @@
 import re
 from typing import Annotated, Optional
 
+import networkx as nx
 from pydantic import AfterValidator, BaseModel, ConfigDict, Field
 
 from spras.config.util import CaseInsensitiveEnum
+from spras.interval import Interval
+from spras.statistics import compute_statistics, statistics_options
 
 # Most options here have an `include` property,
 # which is meant to make disabling parts of the configuration easier.
@@ -148,6 +151,51 @@ class ReconstructionSettings(BaseModel):
 
     model_config = ConfigDict(extra='forbid')
 
+class GraphHeuristics(BaseModel):
+    number_of_nodes: list[Interval] = []
+    number_of_edges: list[Interval] = []
+    number_of_connected_components: list[Interval] = []
+    density: list[Interval] = []
+
+    max_degree: list[Interval] = []
+    median_degree: list[Interval] = []
+    max_diameter: list[Interval] = []
+    average_path_length: list[Interval] = []
+
+    def validate_graph(self, graph: nx.DiGraph):
+        statistics_dictionary = {
+            'Number of nodes': self.number_of_nodes,
+            'Number of edges': self.number_of_edges,
+            'Number of connected components': self.number_of_connected_components,
+            'Density': self.density,
+            'Max degree': self.max_degree,
+            'Median degree': self.median_degree,
+            'Max diameter': self.max_diameter,
+            'Average path length': self.average_path_length
+        }
+
+        # quick assert: is statistics_dictionary exhaustive?
+        assert set(statistics_dictionary.keys()) == set(statistics_options)
+
+        stats = compute_statistics(
+            graph,
+            list(k for k, v in statistics_dictionary.items() if len(v) == 0)
+        )
+
+        for key, value in stats.items():
+            intervals = statistics_dictionary[key]
+
+            matches_heuristics = False
+            for interval in intervals:
+                if interval.mem(value):
+                    matches_heuristics = True
+                    break
+
+            if not matches_heuristics:
+                raise RuntimeError(f"Heuristic {key} with value {value} does not match {intervals}!")
+
+    model_config = ConfigDict(extra='forbid')
+
 class RawConfig(BaseModel):
     # TODO: move these container values to a nested container key
     container_framework: ContainerFramework = ContainerFramework.docker
@@ -165,6 +213,8 @@ class RawConfig(BaseModel):
 
     reconstruction_settings: ReconstructionSettings
 
+    heuristics: GraphHeuristics = GraphHeuristics()
+
     # We include use_attribute_docstrings here to preserve the docstrings
     # after attributes at runtime (for future JSON schema generation)
     model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
diff --git a/spras/interval.py b/spras/interval.py
new file mode 100644
index 000000000..6771a228b
--- /dev/null
+++ b/spras/interval.py
@@ -0,0 +1,203 @@
+"""
+Utilities for defining inequality intervals (e.g. l < x <= u)
+
+For graph heuristics, we allow inequality intervals of the form (num) < (id)?. For example,
+we can say "1500 <" for "1500 < x", or "1000 < x < 2000", etc.
+
+[If there is ever a library that does this, we should replace this code with that library.]
+"""
+
+from enum import Enum
+from typing import Any, Optional, Self
+from pydantic import BaseModel, model_validator
+
+class Operand(Enum):
+    LT = "<"
+    LTE = "<="
+    EQ = "="
+    GTE = ">="
+    GT = ">"
+
+    @classmethod
+    def from_str(cls, string: str) -> Optional[Self]:
+        return next((enum for enum in list(cls) if enum.value == string), None)
+    
+    def is_closed(self) -> bool:
+        """Whether this is a closed inequality. We consider = to be closed."""
+        match self:
+            case Operand.LTE: return True
+            case Operand.EQ: return True
+            case Operand.GT: return True
+        return False
+
+    def as_closed(self):
+        """Closes an operand. Eq does not get modified."""
+        match self:
+            case Operand.LT: return Operand.LTE
+            case Operand.GT: return Operand.GTE
+        return self
+
+    def as_opened(self):
+        """Opens an operand. Eq does not get modified."""
+        match self:
+            case Operand.LTE: return Operand.LT
+            case Operand.GTE: return Operand.GT
+        return self
+    
+    def with_closed(self, closed: bool): return self.as_closed() if closed else self.as_opened()
+
+    def compare(self, left, right) -> bool:
+        match self:
+            case Operand.LT: return left < right
+            case Operand.LTE: return left <= right
+            case Operand.EQ: return left == right
+            case Operand.GTE: return left >= right
+            case Operand.GT: return left > right
+    
+    @classmethod
+    def combine(cls, left: Self, right: Self):
+        """Combines two operands, returning None if the operands don't combine well."""
+        match (left, right):
+            case (Operand.LTE, Operand.LTE): return Operand.LTE
+            case (Operand.LT, Operand.LTE): return Operand.LT
+            case (Operand.LT, Operand.LT): return Operand.LT
+            case (Operand.EQ, op): return op
+            case (op, Operand.EQ): return op
+            case (Operand.GTE, Operand.GTE): return Operand.GTE
+            case (Operand.GT, Operand.GTE): return Operand.GT
+            case (Operand.GT, Operand.GT): return Operand.GT
+        return None
+
+class Interval(BaseModel):
+    lower: Optional[float]
+    upper: Optional[float]
+    lower_closed: bool
+    upper_closed: bool
+
+    def mem(self, num: float) -> bool:
+        if self.lower is not None:
+            meets_lower = self.lower <= num if self.lower_closed else self.lower < num
+        else:
+            meets_lower = True
+        
+        if self.upper is not None:
+            meets_upper = num <= self.upper if self.upper_closed else num < self.upper
+        else:
+            meets_upper = True
+        
+        return meets_lower and meets_upper
+
+    @classmethod
+    def single(cls, num: float) -> Self:
+        return cls(lower=num, upper=num, lower_closed=True, upper_closed=True)
+
+    @classmethod
+    def left_operand(cls, operand: Operand, num: float) -> Self:
+        """Creates an interval whose operand is on the left (e.g. <300)"""
+        match operand:
+            case Operand.LT: return cls(lower=None, upper=num, lower_closed=False, upper_closed=False)
+            case Operand.LTE: return cls(lower=None, upper=num, lower_closed=True, upper_closed=False)
+            case Operand.EQ: return cls.single(num)
+            case Operand.GTE: return cls(lower=num, upper=None, lower_closed=False, upper_closed=False)
+            case Operand.GT: return cls(lower=num, upper=None, lower_closed=False, upper_closed=True)
+
+    @classmethod
+    def right_operand(cls, num: float, operand: Operand) -> Self:
+        """Creates an interval whose operand is on the right (e.g. 300<)"""
+        match operand:
+            case Operand.LT: return cls(lower=num, upper=None, lower_closed=False, upper_closed=False)
+            case Operand.LTE: return cls(lower=num, upper=None, lower_closed=True, upper_closed=False)
+            case Operand.EQ: return cls.single(num)
+            case Operand.GTE: return cls(lower=None, upper=num, lower_closed=False, upper_closed=False)
+            case Operand.GT: return cls(lower=None, upper=num, lower_closed=False, upper_closed=True)
+
+    @classmethod
+    def from_string(cls, input: str) -> Self:
+        tokens = [token.strip() for token in input.split(" ")]
+        
+        assert len(tokens) != 0
+
+        def parse_num(numstr: str) -> Optional[int]:
+            # Allow pythonic separators
+            try:
+                return int(numstr.replace("_", ""))
+            except:
+                return None
+
+        def is_id(idstr: str) -> bool: return idstr.isidentifier()
+
+        # Case 1: (id?) operand number
+        if is_id(tokens[0]):
+            # No other cases have an id at the beginning: we get rid of it.
+            tokens.pop()
+        
+        operand = Operand.from_str(tokens[0])
+        if operand is not None:
+            # (cont.) Case 1: (id?) operand number
+            number = parse_num(tokens[1])
+            assert number is not None, f"found operand {operand.value} and expected a number, but found {tokens[1]} instead."
+            return cls.left_operand(operand, number)
+            
+        # All other cases have a number
+        number = parse_num(tokens.pop())
+        assert number is not None, f"expected an inequality, got {input} instead"
+        
+        # Case 2: number
+        if len(tokens) == 0:
+            return cls.single(number)
+
+        # All other cases have an operand
+        operand = Operand.from_str(tokens.pop())
+        assert operand is not None, f"got {number}, expected an operand afterward."
+
+        # Case 3: number operand (id?)
+        if len(tokens) == 0 or len(tokens) == 1:
+            if len(tokens) == 1: assert is_id(tokens[1])
+            return cls.right_operand(number, operand)
+
+        # Case 4: number operand id operand number
+        id = tokens.pop()
+        assert is_id(id), f"got an inequality of the form {number} {operand.value} and expected nothing or another identifier, but got {id} instead."
+        
+        second_operand_str = tokens.pop()
+        second_operand = Operand.from_str(second_operand_str)
+        assert second_operand is not None, f"got an inequality of the form {number} {operand.value} {id} and was expecting an operand, but got {second_operand_str} instead."
+
+        second_number_str = tokens.pop()
+        second_number = parse_num(second_number_str)
+        assert second_number is not None, f"got an inequality of the form {number} {operand.value} {id} {second_operand.value} and was expecting a number, but got {second_number_str} instead."
+
+        # don't want equals operands in a double inequality (a < b < c)
+        assert operand is not Operand.EQ and second_operand is not Operand.EQ, f"in a double inequality, neither operand can be '='!"
+
+        # are our two numbers valid?
+        combined_operand = Operand.combine(operand, second_operand)
+        assert combined_operand is not None, f"operands {operand.value} and {second_operand} must combine well with each other!"
+        assert combined_operand.compare(number, second_number), f"{number} {operand.value} {second_number} does not hold!"
+        
+        return cls(
+            lower=number,
+            upper=second_number,
+            lower_closed=operand.is_closed(),
+            upper_closed=second_operand.is_closed()
+        )
+
+    def __str__(self) -> str:
+        if not self.lower and not self.upper: return "{empty interval}"
+        if not self.lower:
+            return Operand.LT.with_closed(self.upper_closed).value + " " + str(self.upper)
+        if not self.upper:
+            return str(self.lower) + " " + Operand.LT.with_closed(self.lower_closed).value
+
+        if self.lower == self.upper and self.lower_closed and self.upper_closed: return str(self.lower)
+
+        return str(self.lower) + " " + Operand.LT.with_closed(self.lower_closed).value + " " + "x" \
+            + Operand.LT.with_closed(self.upper_closed).value + str(self.upper)
+
+    # For parsing Intervals automatically with pydantic.
+    @model_validator(mode="before")
+    @classmethod
+    def from_literal(cls, data: Any) -> Any:
+        if isinstance(data, str):
+            return cls.from_string(data)
+        return data

From 898d568a49053467d74af1cb952bdceac400436d Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Wed, 29 Oct 2025 18:15:23 -0700
Subject: [PATCH 06/16] style: specify zip strict

---
 spras/statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/statistics.py b/spras/statistics.py
index 49ae8b3fc..1ebe7cc62 100644
--- a/spras/statistics.py
+++ b/spras/statistics.py
@@ -82,7 +82,7 @@ def compute_statistics(graph: nx.DiGraph, statistics: list[str]) -> dict[str, fl
             computed_tuple = compute(graph)
             assert len(statistic_tuple) == len(computed_tuple), f"bad tuple length for {statistic_tuple}"
 
-            current_computed_statistics = zip(statistic_tuple, computed_tuple)
+            current_computed_statistics = zip(statistic_tuple, computed_tuple, strict=True)
             for stat, value in current_computed_statistics:
                 computed_statistics[stat] = value
 

From 8177ed665b13714e56991eaef8ee45e96c1f0dbb Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 30 Oct 2025 01:41:12 +0000
Subject: [PATCH 07/16] refactor: use heuristic error, mv heuristics outside of
 main schema file

---
 spras/config/heuristics.py | 69 ++++++++++++++++++++++++++++++++++++++
 spras/config/schema.py     | 49 +--------------------------
 2 files changed, 70 insertions(+), 48 deletions(-)
 create mode 100644 spras/config/heuristics.py

diff --git a/spras/config/heuristics.py b/spras/config/heuristics.py
new file mode 100644
index 000000000..3f42c6aba
--- /dev/null
+++ b/spras/config/heuristics.py
@@ -0,0 +1,69 @@
+import networkx as nx
+from pydantic import BaseModel, ConfigDict
+from spras.interval import Interval
+from spras.statistics import compute_statistics, statistics_options
+
+class GraphHeuristicError(RuntimeError):
+    """
+    Represents an error arising from a graph algorithm output
+    not meeting the necessary graph heuristisc.
+    """
+    failed_heuristics: list[tuple[str, float | int, list[Interval]]]
+
+    @staticmethod
+    def to_string(failed_heuristics: list[tuple[str, float | int, list[Interval]]]):
+        return f"The following heuristics failed: {failed_heuristics}"
+
+    def __init__(self, failed_heuristics: list[tuple[str, float | int, list[Interval]]]):
+        super().__init__(GraphHeuristicError.to_string(failed_heuristics))
+
+        self.failed_heuristics = failed_heuristics
+    
+    def __str__(self) -> str:
+        return GraphHeuristicError.to_string(self.failed_heuristics)
+
+class GraphHeuristics(BaseModel):
+    number_of_nodes: Interval | list[Interval] = []
+    number_of_edges: Interval | list[Interval] = []
+    number_of_connected_components: Interval | list[Interval] = []
+    density: Interval | list[Interval] = []
+
+    max_degree: Interval | list[Interval] = []
+    median_degree: Interval | list[Interval] = []
+    max_diameter: Interval | list[Interval] = []
+    average_path_length: Interval | list[Interval] = []
+
+    def validate_graph(self, graph: nx.DiGraph):
+        statistics_dictionary = {
+            'Number of nodes': self.number_of_nodes,
+            'Number of edges': self.number_of_edges,
+            'Number of connected components': self.number_of_connected_components,
+            'Density': self.density,
+            'Max degree': self.max_degree,
+            'Median degree': self.median_degree,
+            'Max diameter': self.max_diameter,
+            'Average path length': self.average_path_length
+        }
+
+        # quick assert: is statistics_dictionary exhaustive?
+        assert set(statistics_dictionary.keys()) == set(statistics_options)
+
+        stats = compute_statistics(
+            graph,
+            list(k for k, v in statistics_dictionary.items() if isinstance(v, list) and len(v) == 0)
+        )
+
+        failed_heuristics: list[tuple[str, float | int, list[Interval]]] = []
+        for key, value in stats.items():
+            intervals = statistics_dictionary[key]
+            if not isinstance(intervals, list): intervals = [intervals]
+
+            for interval in intervals:
+                if interval.mem(value):
+                    failed_heuristics.append((key, value, intervals))
+                    break
+
+        if len(failed_heuristics) != 0:
+            raise GraphHeuristicError(failed_heuristics)
+
+    model_config = ConfigDict(extra='forbid')
diff --git a/spras/config/schema.py b/spras/config/schema.py
index 846188e4c..3678a541c 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -13,13 +13,11 @@
 import re
 from typing import Annotated, Optional
 
-import networkx as nx
 from pydantic import AfterValidator, BaseModel, ConfigDict
 
 from spras.config.container_schema import ContainerSettings
+from spras.config.heuristics import GraphHeuristics
 from spras.config.util import CaseInsensitiveEnum
-from spras.interval import Interval
-from spras.statistics import compute_statistics, statistics_options
 
 # Most options here have an `include` property,
 # which is meant to make disabling parts of the configuration easier.
@@ -140,51 +138,6 @@ class ReconstructionSettings(BaseModel):
 
     model_config = ConfigDict(extra='forbid')
 
-class GraphHeuristics(BaseModel):
-    number_of_nodes: list[Interval] = []
-    number_of_edges: list[Interval] = []
-    number_of_connected_components: list[Interval] = []
-    density: list[Interval] = []
-
-    max_degree: list[Interval] = []
-    median_degree: list[Interval] = []
-    max_diameter: list[Interval] = []
-    average_path_length: list[Interval] = []
-
-    def validate_graph(self, graph: nx.DiGraph):
-        statistics_dictionary = {
-            'Number of nodes': self.number_of_nodes,
-            'Number of edges': self.number_of_edges,
-            'Number of connected components': self.number_of_connected_components,
-            'Density': self.density,
-            'Max degree': self.max_degree,
-            'Median degree': self.median_degree,
-            'Max diameter': self.max_diameter,
-            'Average path length': self.average_path_length
-        }
-
-        # quick assert: is statistics_dictionary exhaustive?
-        assert set(statistics_dictionary.keys()) == set(statistics_options)
-
-        stats = compute_statistics(
-            graph,
-            list(k for k, v in statistics_dictionary.items() if len(v) == 0)
-        )
-
-        for key, value in stats.items():
-            intervals = statistics_dictionary[key]
-
-            matches_heuristics = False
-            for interval in intervals:
-                if interval.mem(value):
-                    matches_heuristics = True
-                    break
-
-            if not matches_heuristics:
-                raise RuntimeError(f"Heuristic {key} with value {value} does not match {intervals}!")
-
-    model_config = ConfigDict(extra='forbid')
-
 class RawConfig(BaseModel):
     containers: ContainerSettings
     enable_profiling: bool = False

From fac110847104dd77fa46bcb3d67bd8af4d4df6c7 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 30 Oct 2025 06:34:20 +0000
Subject: [PATCH 08/16] fix: proper tokenization

---
 spras/config/heuristics.py |  4 +++-
 spras/interval.py          | 30 ++++++++++++++++++------------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/spras/config/heuristics.py b/spras/config/heuristics.py
index 3f42c6aba..99ecb4695 100644
--- a/spras/config/heuristics.py
+++ b/spras/config/heuristics.py
@@ -1,8 +1,10 @@
 import networkx as nx
 from pydantic import BaseModel, ConfigDict
+
 from spras.interval import Interval
 from spras.statistics import compute_statistics, statistics_options
 
+
 class GraphHeuristicError(RuntimeError):
     """
     Represents an error arising from a graph algorithm output
@@ -18,7 +20,7 @@ def __init__(self, failed_heuristics: list[tuple[str, float | int, list[Interval
         super().__init__(GraphHeuristicError.to_string(failed_heuristics))
 
         self.failed_heuristics = failed_heuristics
-    
+
     def __str__(self) -> str:
         return GraphHeuristicError.to_string(self.failed_heuristics)
 
diff --git a/spras/interval.py b/spras/interval.py
index 6771a228b..6b304075d 100644
--- a/spras/interval.py
+++ b/spras/interval.py
@@ -7,10 +7,14 @@
 [If there is ever a library that does this, we should replace this code with that library.]
 """
 
+import tokenize
 from enum import Enum
+from io import BytesIO
 from typing import Any, Optional, Self
+
 from pydantic import BaseModel, model_validator
 
+
 class Operand(Enum):
     LT = "<"
     LTE = "<="
@@ -21,7 +25,7 @@ class Operand(Enum):
     @classmethod
     def from_str(cls, string: str) -> Optional[Self]:
         return next((enum for enum in list(cls) if enum.value == string), None)
-    
+
     def is_closed(self) -> bool:
         """Whether this is a closed inequality. We consider = to be closed."""
         match self:
@@ -43,7 +47,7 @@ def as_opened(self):
             case Operand.LTE: return Operand.LT
             case Operand.GTE: return Operand.GT
         return self
-    
+
     def with_closed(self, closed: bool): return self.as_closed() if closed else self.as_opened()
 
     def compare(self, left, right) -> bool:
@@ -53,7 +57,7 @@ def compare(self, left, right) -> bool:
             case Operand.EQ: return left == right
             case Operand.GTE: return left >= right
             case Operand.GT: return left > right
-    
+
     @classmethod
     def combine(cls, left: Self, right: Self):
         """Combines two operands, returning None if the operands don't combine well."""
@@ -79,12 +83,12 @@ def mem(self, num: float) -> bool:
             meets_lower = self.lower <= num if self.lower_closed else self.lower < num
         else:
             meets_lower = True
-        
+
         if self.upper is not None:
             meets_upper = num <= self.upper if self.upper_closed else num < self.upper
         else:
             meets_upper = True
-        
+
         return meets_lower and meets_upper
 
     @classmethod
@@ -113,8 +117,10 @@ def right_operand(cls, num: float, operand: Operand) -> Self:
 
     @classmethod
     def from_string(cls, input: str) -> Self:
-        tokens = [token.strip() for token in input.split(" ")]
-        
+        # We can't do a normal string#split here for cases like "1500<"
+        tokens = [t.string for t in tokenize.tokenize(BytesIO(input.encode('utf-8')).readline) if t.string != ""]
+        tokens.pop() # drop utf-8 indicator
+
         assert len(tokens) != 0
 
         def parse_num(numstr: str) -> Optional[int]:
@@ -130,18 +136,18 @@ def is_id(idstr: str) -> bool: return idstr.isidentifier()
         if is_id(tokens[0]):
             # No other cases have an id at the beginning: we get rid of it.
             tokens.pop()
-        
+
         operand = Operand.from_str(tokens[0])
         if operand is not None:
             # (cont.) Case 1: (id?) operand number
             number = parse_num(tokens[1])
             assert number is not None, f"found operand {operand.value} and expected a number, but found {tokens[1]} instead."
             return cls.left_operand(operand, number)
-            
+
         # All other cases have a number
         number = parse_num(tokens.pop())
         assert number is not None, f"expected an inequality, got {input} instead"
-        
+
         # Case 2: number
         if len(tokens) == 0:
             return cls.single(number)
@@ -158,7 +164,7 @@ def is_id(idstr: str) -> bool: return idstr.isidentifier()
         # Case 4: number operand id operand number
         id = tokens.pop()
         assert is_id(id), f"got an inequality of the form {number} {operand.value} and expected nothing or another identifier, but got {id} instead."
-        
+
         second_operand_str = tokens.pop()
         second_operand = Operand.from_str(second_operand_str)
         assert second_operand is not None, f"got an inequality of the form {number} {operand.value} {id} and was expecting an operand, but got {second_operand_str} instead."
@@ -174,7 +180,7 @@ def is_id(idstr: str) -> bool: return idstr.isidentifier()
         combined_operand = Operand.combine(operand, second_operand)
         assert combined_operand is not None, f"operands {operand.value} and {second_operand} must combine well with each other!"
         assert combined_operand.compare(number, second_number), f"{number} {operand.value} {second_number} does not hold!"
-        
+
         return cls(
             lower=number,
             upper=second_number,

From 2e0d8d0faec96ffd5fcfb49496645f9464e3f24c Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 30 Oct 2025 07:13:24 +0000
Subject: [PATCH 09/16] fix(interval): correct parsing

---
 spras/interval.py     | 16 ++++++++--------
 test/test_interval.py |  6 ++++++
 2 files changed, 14 insertions(+), 8 deletions(-)
 create mode 100644 test/test_interval.py

diff --git a/spras/interval.py b/spras/interval.py
index 6b304075d..52e4ab802 100644
--- a/spras/interval.py
+++ b/spras/interval.py
@@ -119,7 +119,7 @@ def right_operand(cls, num: float, operand: Operand) -> Self:
     def from_string(cls, input: str) -> Self:
         # We can't do a normal string#split here for cases like "1500<"
         tokens = [t.string for t in tokenize.tokenize(BytesIO(input.encode('utf-8')).readline) if t.string != ""]
-        tokens.pop() # drop utf-8 indicator
+        tokens.pop(0) # drop utf-8 indicator
 
         assert len(tokens) != 0
 
@@ -135,7 +135,7 @@ def is_id(idstr: str) -> bool: return idstr.isidentifier()
         # Case 1: (id?) operand number
         if is_id(tokens[0]):
             # No other cases have an id at the beginning: we get rid of it.
-            tokens.pop()
+            tokens.pop(0)
 
         operand = Operand.from_str(tokens[0])
         if operand is not None:
@@ -145,15 +145,15 @@ def is_id(idstr: str) -> bool: return idstr.isidentifier()
             return cls.left_operand(operand, number)
 
         # All other cases have a number
-        number = parse_num(tokens.pop())
-        assert number is not None, f"expected an inequality, got {input} instead"
+        number = parse_num(tokens.pop(0))
+        assert number is not None, f"expected a number, got {input} instead"
 
         # Case 2: number
         if len(tokens) == 0:
             return cls.single(number)
 
         # All other cases have an operand
-        operand = Operand.from_str(tokens.pop())
+        operand = Operand.from_str(tokens.pop(0))
         assert operand is not None, f"got {number}, expected an operand afterward."
 
         # Case 3: number operand (id?)
@@ -162,14 +162,14 @@ def is_id(idstr: str) -> bool: return idstr.isidentifier()
             return cls.right_operand(number, operand)
 
         # Case 4: number operand id operand number
-        id = tokens.pop()
+        id = tokens.pop(0)
         assert is_id(id), f"got an inequality of the form {number} {operand.value} and expected nothing or another identifier, but got {id} instead."
 
-        second_operand_str = tokens.pop()
+        second_operand_str = tokens.pop(0)
         second_operand = Operand.from_str(second_operand_str)
         assert second_operand is not None, f"got an inequality of the form {number} {operand.value} {id} and was expecting an operand, but got {second_operand_str} instead."
 
-        second_number_str = tokens.pop()
+        second_number_str = tokens.pop(0)
         second_number = parse_num(second_number_str)
         assert second_number is not None, f"got an inequality of the form {number} {operand.value} {id} {second_operand.value} and was expecting a number, but got {second_number_str} instead."
 
diff --git a/test/test_interval.py b/test/test_interval.py
new file mode 100644
index 000000000..e263817bc
--- /dev/null
+++ b/test/test_interval.py
@@ -0,0 +1,6 @@
+from spras.interval import Interval
+
+class TestInterval:
+    def test_number(self):
+        assert Interval.single(5) == Interval(lower=5, upper=5, lower_closed=True, upper_closed=True)
+        assert Interval.from_string("5") == Interval.single(5)

From 183c3ad874fad9096ced9aa4cc4c6ccd88769687 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 30 Oct 2025 07:30:19 +0000
Subject: [PATCH 10/16] fix(interval): correct other parsing mistakes

---
 spras/interval.py     | 53 ++++++++++++++++++++++++++++---------------
 test/test_interval.py | 11 +++++++++
 2 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/spras/interval.py b/spras/interval.py
index 52e4ab802..7e5f6c663 100644
--- a/spras/interval.py
+++ b/spras/interval.py
@@ -10,7 +10,7 @@
 import tokenize
 from enum import Enum
 from io import BytesIO
-from typing import Any, Optional, Self
+from typing import Any, Optional, Self, cast
 
 from pydantic import BaseModel, model_validator
 
@@ -31,7 +31,7 @@ def is_closed(self) -> bool:
         match self:
             case Operand.LTE: return True
             case Operand.EQ: return True
-            case Operand.GT: return True
+            case Operand.GTE: return True
         return False
 
     def as_closed(self):
@@ -57,6 +57,14 @@ def compare(self, left, right) -> bool:
             case Operand.EQ: return left == right
             case Operand.GTE: return left >= right
             case Operand.GT: return left > right
+    
+    def flip(self):
+        match self:
+            case Operand.LT: return Operand.GT
+            case Operand.LTE: return Operand.GTE
+            case Operand.EQ: return Operand.EQ
+            case Operand.GTE: return Operand.LTE
+            case Operand.GT: return Operand.LT
 
     @classmethod
     def combine(cls, left: Self, right: Self):
@@ -64,11 +72,13 @@ def combine(cls, left: Self, right: Self):
         match (left, right):
             case (Operand.LTE, Operand.LTE): return Operand.LTE
             case (Operand.LT, Operand.LTE): return Operand.LT
+            case (Operand.LTE, Operand.LT): return Operand.LT
             case (Operand.LT, Operand.LT): return Operand.LT
             case (Operand.EQ, op): return op
             case (op, Operand.EQ): return op
             case (Operand.GTE, Operand.GTE): return Operand.GTE
             case (Operand.GT, Operand.GTE): return Operand.GT
+            case (Operand.GTE, Operand.GT): return Operand.GT
             case (Operand.GT, Operand.GT): return Operand.GT
         return None
 
@@ -108,12 +118,8 @@ def left_operand(cls, operand: Operand, num: float) -> Self:
     @classmethod
     def right_operand(cls, num: float, operand: Operand) -> Self:
         """Creates an interval whose operand is on the right (e.g. 300<)"""
-        match operand:
-            case Operand.LT: return cls(lower=num, upper=None, lower_closed=False, upper_closed=False)
-            case Operand.LTE: return cls(lower=num, upper=None, lower_closed=True, upper_closed=False)
-            case Operand.EQ: return cls.single(num)
-            case Operand.GTE: return cls(lower=None, upper=num, lower_closed=False, upper_closed=False)
-            case Operand.GT: return cls(lower=None, upper=num, lower_closed=False, upper_closed=True)
+        # TODO: remove cast?
+        return cast(Self, Interval.left_operand(operand.flip(), num))
 
     @classmethod
     def from_string(cls, input: str) -> Self:
@@ -158,7 +164,7 @@ def is_id(idstr: str) -> bool: return idstr.isidentifier()
 
         # Case 3: number operand (id?)
         if len(tokens) == 0 or len(tokens) == 1:
-            if len(tokens) == 1: assert is_id(tokens[1])
+            if len(tokens) == 1: assert is_id(tokens[0])
             return cls.right_operand(number, operand)
 
         # Case 4: number operand id operand number
@@ -178,15 +184,23 @@ def is_id(idstr: str) -> bool: return idstr.isidentifier()
 
         # are our two numbers valid?
         combined_operand = Operand.combine(operand, second_operand)
-        assert combined_operand is not None, f"operands {operand.value} and {second_operand} must combine well with each other!"
+        assert combined_operand is not None, f"operands {operand.value} and {second_operand.value} must combine well with each other!"
         assert combined_operand.compare(number, second_number), f"{number} {operand.value} {second_number} does not hold!"
 
-        return cls(
-            lower=number,
-            upper=second_number,
-            lower_closed=operand.is_closed(),
-            upper_closed=second_operand.is_closed()
-        )
+        if combined_operand.as_opened() == Operand.LT:
+            return cls(
+                lower=number,
+                upper=second_number,
+                lower_closed=operand.is_closed(),
+                upper_closed=second_operand.is_closed()
+            )
+        else:
+            return cls(
+                lower=second_number,
+                upper=number,
+                lower_closed=second_operand.is_closed(),
+                upper_closed=operand.is_closed()
+            )
 
     def __str__(self) -> str:
         if not self.lower and not self.upper: return "{empty interval}"
@@ -197,8 +211,11 @@ def __str__(self) -> str:
 
         if self.lower == self.upper and self.lower_closed and self.upper_closed: return str(self.lower)
 
-        return str(self.lower) + " " + Operand.LT.with_closed(self.lower_closed).value + " " + "x" \
-            + Operand.LT.with_closed(self.upper_closed).value + str(self.upper)
+        return str(self.lower) + " " + Operand.LT.with_closed(self.lower_closed).value + " x " \
+            + Operand.LT.with_closed(self.upper_closed).value + " " + str(self.upper)
+
+    def __repr__(self) -> str:
+        return f"Interval[{str(self)}]"
 
     # For parsing Intervals automatically with pydantic.
     @model_validator(mode="before")
diff --git a/test/test_interval.py b/test/test_interval.py
index e263817bc..1c3b69a61 100644
--- a/test/test_interval.py
+++ b/test/test_interval.py
@@ -4,3 +4,14 @@ class TestInterval:
     def test_number(self):
         assert Interval.single(5) == Interval(lower=5, upper=5, lower_closed=True, upper_closed=True)
         assert Interval.from_string("5") == Interval.single(5)
+    
+    def test_string_permutations(self):
+        assert Interval.from_string("<5") == Interval.from_string("<  5")
+        assert Interval.from_string("5<") == Interval.from_string("5 < x")
+        assert Interval.from_string("6<") == Interval.from_string("x > 6")
+        assert Interval.from_string("100 <") == Interval.from_string(">100")
+        assert Interval.from_string("200 >= x > 100") == Interval.from_string("100 < x <= 200")
+    
+    def test_orientation(self):
+        assert Interval.from_string("10<").upper is None
+        assert Interval.from_string("10<").lower == 10.0

From 0b6e01f59bfc1e8a25f525194d6548c82a0b11b9 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Thu, 6 Nov 2025 00:02:39 +0000
Subject: [PATCH 11/16] feat: integrate heuristics

---
 Snakefile                  |  1 +
 spras/config/heuristics.py | 15 +++++++++++++++
 spras/interval.py          |  2 +-
 spras/util.py              |  2 ++
 test/test_interval.py      |  5 +++--
 5 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/Snakefile b/Snakefile
index 02f019e8d..ffb5c9f8c 100644
--- a/Snakefile
+++ b/Snakefile
@@ -295,6 +295,7 @@ rule parse_output:
         params = reconstruction_params(wildcards.algorithm, wildcards.params).copy()
         params['dataset'] = input.dataset_file
         runner.parse_output(wildcards.algorithm, input.raw_file, output.standardized_file, params)
+        _config.config.heuristics.validate_graph_from_file(output.standardized_file)
 
 # TODO: reuse in the future once we make summary work for mixed graphs. See https://github.com/Reed-CompBio/spras/issues/128
 # Collect summary statistics for a single pathway
diff --git a/spras/config/heuristics.py b/spras/config/heuristics.py
index 99ecb4695..2c003fe64 100644
--- a/spras/config/heuristics.py
+++ b/spras/config/heuristics.py
@@ -1,3 +1,5 @@
+import os
+
 import networkx as nx
 from pydantic import BaseModel, ConfigDict
 
@@ -69,3 +71,16 @@ def validate_graph(self, graph: nx.DiGraph):
             raise GraphHeuristicError(failed_heuristics)
 
     model_config = ConfigDict(extra='forbid')
+
+    def validate_graph_from_file(self, path: str | os.PathLike):
+        # TODO: re-use from summary.py once we have a mixed/hypergraph library
+        G = nx.read_edgelist(path, data=(('weight', float), ('Direction', str)), create_using=nx.DiGraph)
+
+        # We explicitly use `list` here to stop add_edge
+        # from expanding our iterator infinitely.
+        for source, target, data in list(G.edges(data=True)):
+            if data["Direction"] == 'U':
+                G.add_edge(target, source, data)
+            pass
+
+        return self.validate_graph(G)
diff --git a/spras/interval.py b/spras/interval.py
index 7e5f6c663..266997fd5 100644
--- a/spras/interval.py
+++ b/spras/interval.py
@@ -57,7 +57,7 @@ def compare(self, left, right) -> bool:
             case Operand.EQ: return left == right
             case Operand.GTE: return left >= right
             case Operand.GT: return left > right
-    
+
     def flip(self):
         match self:
             case Operand.LT: return Operand.GT
diff --git a/spras/util.py b/spras/util.py
index ce2cc2f96..594a2a3ad 100644
--- a/spras/util.py
+++ b/spras/util.py
@@ -102,6 +102,8 @@ def raw_pathway_df(raw_pathway_file: str, sep: str = '\t', header: int = None) -
 
     return df
 
+def output_pathw
+
 
 def duplicate_edges(df: pd.DataFrame) -> tuple[pd.DataFrame, bool]:
     """
diff --git a/test/test_interval.py b/test/test_interval.py
index 1c3b69a61..840f8d057 100644
--- a/test/test_interval.py
+++ b/test/test_interval.py
@@ -1,17 +1,18 @@
 from spras.interval import Interval
 
+
 class TestInterval:
     def test_number(self):
         assert Interval.single(5) == Interval(lower=5, upper=5, lower_closed=True, upper_closed=True)
         assert Interval.from_string("5") == Interval.single(5)
-    
+
     def test_string_permutations(self):
         assert Interval.from_string("<5") == Interval.from_string("<  5")
         assert Interval.from_string("5<") == Interval.from_string("5 < x")
         assert Interval.from_string("6<") == Interval.from_string("x > 6")
         assert Interval.from_string("100 <") == Interval.from_string(">100")
         assert Interval.from_string("200 >= x > 100") == Interval.from_string("100 < x <= 200")
-    
+
     def test_orientation(self):
         assert Interval.from_string("10<").upper is None
         assert Interval.from_string("10<").lower == 10.0

From 33e004f92c2ca09ee7956633f3fb210a5dcd83d8 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Thu, 6 Nov 2025 00:56:20 +0000
Subject: [PATCH 12/16] fix: drop random code

---
 spras/util.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/spras/util.py b/spras/util.py
index 594a2a3ad..ce2cc2f96 100644
--- a/spras/util.py
+++ b/spras/util.py
@@ -102,8 +102,6 @@ def raw_pathway_df(raw_pathway_file: str, sep: str = '\t', header: int = None) -
 
     return df
 
-def output_pathw
-
 
 def duplicate_edges(df: pd.DataFrame) -> tuple[pd.DataFrame, bool]:
     """

From c675eced3b62b8a62204d9f6105628e1cdc09045 Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 6 Nov 2025 02:22:45 +0000
Subject: [PATCH 13/16] fix: make undirected for determining number of
 connected components

---
 spras/statistics.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/spras/statistics.py b/spras/statistics.py
index 1ebe7cc62..222051d23 100644
--- a/spras/statistics.py
+++ b/spras/statistics.py
@@ -24,7 +24,8 @@ def compute_degree(graph: nx.DiGraph) -> tuple[int, float]:
         degrees = [deg for _, deg in graph.degree()]
         return max(degrees), median(degrees)
 
-def compute_on_cc(graph: nx.DiGraph) -> tuple[int, float]:
+def compute_on_cc(directed_graph: nx.DiGraph) -> tuple[int, float]:
+    graph: nx.Graph = directed_graph.to_undirected()
     cc = list(nx.connected_components(graph))
     # Save the max diameter
     # Use diameter only for components with ≥2 nodes (singleton components have diameter 0)
@@ -52,7 +53,7 @@ def compute_on_cc(graph: nx.DiGraph) -> tuple[int, float]:
 statistics_computation: dict[tuple[str, ...], Callable[[nx.DiGraph], tuple[float | int, ...]]] = {
     ('Number of nodes',): lambda graph : (graph.number_of_nodes(),),
     ('Number of edges',): lambda graph : (graph.number_of_edges(),),
-    ('Number of connected components',): lambda graph : (nx.number_connected_components(graph),),
+    ('Number of connected components',): lambda graph : (nx.number_connected_components(graph.to_undirected()),),
     ('Density',): lambda graph : (nx.density(graph),),
 
     ('Max degree', 'Median degree'): compute_degree,

From 1cdaf121c84e08bb8a2c9c607e3286ffdbfdda0a Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Wed, 5 Nov 2025 18:23:13 -0800
Subject: [PATCH 14/16] fix: specify heuristics in wrapping config object

---
 Snakefile                  | 2 ++
 spras/config/config.py     | 2 ++
 spras/config/heuristics.py | 4 ++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index ffb5c9f8c..65e24f15d 100644
--- a/Snakefile
+++ b/Snakefile
@@ -295,6 +295,8 @@ rule parse_output:
         params = reconstruction_params(wildcards.algorithm, wildcards.params).copy()
         params['dataset'] = input.dataset_file
         runner.parse_output(wildcards.algorithm, input.raw_file, output.standardized_file, params)
+        # TODO: cache heuristics result, store partial heuristics configuration file
+        # to allow this rule to update when heuristics change
         _config.config.heuristics.validate_graph_from_file(output.standardized_file)
 
 # TODO: reuse in the future once we make summary work for mixed graphs. See https://github.com/Reed-CompBio/spras/issues/128
diff --git a/spras/config/config.py b/spras/config/config.py
index 25e6f72de..51d3daf3b 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -78,6 +78,8 @@ def __init__(self, raw_config: dict[str, Any]):
         self.container_settings = ProcessedContainerSettings.from_container_settings(parsed_raw_config.containers, self.hash_length)
         # The list of algorithms to run in the workflow. Each is a dict with 'name' as an expected key.
         self.algorithms = None
+        # The heuristic handler
+        self.heuristics = parsed_raw_config.heuristics
         # A nested dict mapping algorithm names to dicts that map parameter hashes to parameter combinations.
         # Only includes algorithms that are set to be run with 'include: true'.
         self.algorithm_params = None
diff --git a/spras/config/heuristics.py b/spras/config/heuristics.py
index 2c003fe64..f5f9ea24c 100644
--- a/spras/config/heuristics.py
+++ b/spras/config/heuristics.py
@@ -74,13 +74,13 @@ def validate_graph(self, graph: nx.DiGraph):
 
     def validate_graph_from_file(self, path: str | os.PathLike):
         # TODO: re-use from summary.py once we have a mixed/hypergraph library
-        G = nx.read_edgelist(path, data=(('weight', float), ('Direction', str)), create_using=nx.DiGraph)
+        G: nx.DiGraph = nx.read_edgelist(path, data=(('Rank', str), ('Direction', str)), create_using=nx.DiGraph)
 
         # We explicitly use `list` here to stop add_edge
         # from expanding our iterator infinitely.
         for source, target, data in list(G.edges(data=True)):
             if data["Direction"] == 'U':
-                G.add_edge(target, source, data)
+                G.add_edge(target, source, data=data)
             pass
 
         return self.validate_graph(G)

From 7b290dcb636309534ec3ccdeae06d34a1c077f1b Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 6 Nov 2025 08:31:13 +0000
Subject: [PATCH 15/16] feat: interval and heuristic testing

---
 spras/config/heuristics.py              | 33 +++++++++++++++++++------
 spras/interval.py                       | 30 +++++++++++++---------
 test/heuristics/__init__.py             |  0
 test/heuristics/fixtures/empty.txt      |  0
 test/heuristics/fixtures/nonempty.txt   |  1 +
 test/heuristics/fixtures/undirected.txt |  1 +
 test/heuristics/test_heuristics.py      | 26 +++++++++++++++++++
 test/test_interval.py                   |  3 +++
 8 files changed, 76 insertions(+), 18 deletions(-)
 create mode 100644 test/heuristics/__init__.py
 create mode 100644 test/heuristics/fixtures/empty.txt
 create mode 100644 test/heuristics/fixtures/nonempty.txt
 create mode 100644 test/heuristics/fixtures/undirected.txt
 create mode 100644 test/heuristics/test_heuristics.py

diff --git a/spras/config/heuristics.py b/spras/config/heuristics.py
index f5f9ea24c..52c4839c6 100644
--- a/spras/config/heuristics.py
+++ b/spras/config/heuristics.py
@@ -6,25 +6,40 @@
 from spras.interval import Interval
 from spras.statistics import compute_statistics, statistics_options
 
+all = ['GraphHeuristicsError', 'GraphHeuristic']
 
-class GraphHeuristicError(RuntimeError):
+class GraphHeuristicsError(RuntimeError):
     """
     Represents an error arising from a graph algorithm output
     not meeting the necessary graph heuristisc.
     """
     failed_heuristics: list[tuple[str, float | int, list[Interval]]]
 
+    @staticmethod
+    def format_failed_heuristic(heuristic: tuple[str, float | int, list[Interval]]) -> str:
+        name, desired, intervals = heuristic
+        if len(intervals) == 1:
+            interval_string = str(intervals[0])
+        else:
+            formatted_intervals = ", ".join([str(interval) for interval in intervals])
+            interval_string = f"one of the intervals ({formatted_intervals})"
+        return f"{name} expected {desired} in interval {interval_string}"
     @staticmethod
     def to_string(failed_heuristics: list[tuple[str, float | int, list[Interval]]]):
-        return f"The following heuristics failed: {failed_heuristics}"
+        formatted_heuristics = [
+            GraphHeuristicsError.format_failed_heuristic(heuristic) for heuristic in failed_heuristics
+        ]
+
+        formatted_heuristics = "\n".join([f"- {formatted_heuristics}" for heuristic in formatted_heuristics])
+        return f"The following heuristics failed:\n{formatted_heuristics}"
 
     def __init__(self, failed_heuristics: list[tuple[str, float | int, list[Interval]]]):
-        super().__init__(GraphHeuristicError.to_string(failed_heuristics))
+        super().__init__(GraphHeuristicsError.to_string(failed_heuristics))
 
         self.failed_heuristics = failed_heuristics
 
     def __str__(self) -> str:
-        return GraphHeuristicError.to_string(self.failed_heuristics)
+        return GraphHeuristicsError.to_string(self.failed_heuristics)
 
 class GraphHeuristics(BaseModel):
     number_of_nodes: Interval | list[Interval] = []
@@ -54,7 +69,7 @@ def validate_graph(self, graph: nx.DiGraph):
 
         stats = compute_statistics(
             graph,
-            list(k for k, v in statistics_dictionary.items() if isinstance(v, list) and len(v) == 0)
+            list(k for k, v in statistics_dictionary.items() if not isinstance(v, list) or len(v) != 0)
         )
 
         failed_heuristics: list[tuple[str, float | int, list[Interval]]] = []
@@ -63,16 +78,20 @@ def validate_graph(self, graph: nx.DiGraph):
             if not isinstance(intervals, list): intervals = [intervals]
 
             for interval in intervals:
-                if interval.mem(value):
+                if not interval.mem(value):
                     failed_heuristics.append((key, value, intervals))
                     break
 
         if len(failed_heuristics) != 0:
-            raise GraphHeuristicError(failed_heuristics)
+            raise GraphHeuristicsError(failed_heuristics)
 
     model_config = ConfigDict(extra='forbid')
 
     def validate_graph_from_file(self, path: str | os.PathLike):
+        """
+        Takes in a graph produced by PRM#parse_output,
+        and throws a GraphHeuristicsError if it fails the heuristics in `self`.
+        """
         # TODO: re-use from summary.py once we have a mixed/hypergraph library
         G: nx.DiGraph = nx.read_edgelist(path, data=(('Rank', str), ('Direction', str)), create_using=nx.DiGraph)
 
diff --git a/spras/interval.py b/spras/interval.py
index 266997fd5..b65f87a7c 100644
--- a/spras/interval.py
+++ b/spras/interval.py
@@ -10,9 +10,10 @@
 import tokenize
 from enum import Enum
 from io import BytesIO
-from typing import Any, Optional, Self, cast
+from typing import Any, ClassVar, Optional, Self, cast
 
-from pydantic import BaseModel, model_validator
+from pydantic import model_serializer, model_validator
+from pydantic.dataclasses import dataclass
 
 
 class Operand(Enum):
@@ -82,7 +83,10 @@ def combine(cls, left: Self, right: Self):
             case (Operand.GT, Operand.GT): return Operand.GT
         return None
 
-class Interval(BaseModel):
+@dataclass
+class Interval:
+    EMPTY_STRING: ClassVar[str] = "{empty interval}"
+
     lower: Optional[float]
     upper: Optional[float]
     lower_closed: bool
@@ -110,10 +114,10 @@ def left_operand(cls, operand: Operand, num: float) -> Self:
         """Creates an interval whose operand is on the left (e.g. <300)"""
         match operand:
             case Operand.LT: return cls(lower=None, upper=num, lower_closed=False, upper_closed=False)
-            case Operand.LTE: return cls(lower=None, upper=num, lower_closed=True, upper_closed=False)
+            case Operand.LTE: return cls(lower=None, upper=num, lower_closed=False, upper_closed=True)
             case Operand.EQ: return cls.single(num)
-            case Operand.GTE: return cls(lower=num, upper=None, lower_closed=False, upper_closed=False)
-            case Operand.GT: return cls(lower=num, upper=None, lower_closed=False, upper_closed=True)
+            case Operand.GTE: return cls(lower=num, upper=None, lower_closed=True, upper_closed=False)
+            case Operand.GT: return cls(lower=num, upper=None, lower_closed=False, upper_closed=False)
 
     @classmethod
     def right_operand(cls, num: float, operand: Operand) -> Self:
@@ -203,10 +207,10 @@ def is_id(idstr: str) -> bool: return idstr.isidentifier()
             )
 
     def __str__(self) -> str:
-        if not self.lower and not self.upper: return "{empty interval}"
-        if not self.lower:
+        if self.lower is None and self.upper is None: return Interval.EMPTY_STRING
+        if self.lower is None:
             return Operand.LT.with_closed(self.upper_closed).value + " " + str(self.upper)
-        if not self.upper:
+        if self.upper is None:
             return str(self.lower) + " " + Operand.LT.with_closed(self.lower_closed).value
 
         if self.lower == self.upper and self.lower_closed and self.upper_closed: return str(self.lower)
@@ -221,6 +225,10 @@ def __repr__(self) -> str:
     @model_validator(mode="before")
     @classmethod
     def from_literal(cls, data: Any) -> Any:
-        if isinstance(data, str):
-            return cls.from_string(data)
+        if isinstance(data, int) or isinstance(data, float) or isinstance(data, str):
+            return vars(cls.from_string(str(data)))
         return data
+
+    @model_serializer(mode='plain')
+    def serialize_model(self) -> str:
+        return str(self)
diff --git a/test/heuristics/__init__.py b/test/heuristics/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/heuristics/fixtures/empty.txt b/test/heuristics/fixtures/empty.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/heuristics/fixtures/nonempty.txt b/test/heuristics/fixtures/nonempty.txt
new file mode 100644
index 000000000..8e9f8ac96
--- /dev/null
+++ b/test/heuristics/fixtures/nonempty.txt
@@ -0,0 +1 @@
+A	B	1	D
diff --git a/test/heuristics/fixtures/undirected.txt b/test/heuristics/fixtures/undirected.txt
new file mode 100644
index 000000000..627d30073
--- /dev/null
+++ b/test/heuristics/fixtures/undirected.txt
@@ -0,0 +1 @@
+A	B	1	U
diff --git a/test/heuristics/test_heuristics.py b/test/heuristics/test_heuristics.py
new file mode 100644
index 000000000..3512915e0
--- /dev/null
+++ b/test/heuristics/test_heuristics.py
@@ -0,0 +1,26 @@
+from pathlib import Path
+import pytest
+
+from spras.config.heuristics import GraphHeuristics, GraphHeuristicsError
+
+FIXTURES_DIR = Path('test', 'heuristics', 'fixtures')
+
+class TestHeuristics:
+    def parse(self, heuristics: dict) -> GraphHeuristics:
+        return GraphHeuristics.model_validate(heuristics)
+
+    def test_nonempty(self):
+        self.parse({ 'number_of_nodes': '>0', 'number_of_edges': '1' }
+                   ).validate_graph_from_file(FIXTURES_DIR / 'nonempty.txt')
+
+    def test_empty(self):
+        self.parse({ 'number_of_nodes': '<1' }
+                       ).validate_graph_from_file(FIXTURES_DIR / 'empty.txt')
+        
+        with pytest.raises(GraphHeuristicsError):
+            self.parse({ 'number_of_nodes': '0<' }
+                       ).validate_graph_from_file(FIXTURES_DIR / 'empty.txt')
+
+    def test_undirected(self):
+        self.parse({ 'number_of_nodes': '1 < x < 3', 'number_of_edges': 2 }
+                    ).validate_graph_from_file(FIXTURES_DIR / 'undirected.txt')
\ No newline at end of file
diff --git a/test/test_interval.py b/test/test_interval.py
index 840f8d057..1481d1a79 100644
--- a/test/test_interval.py
+++ b/test/test_interval.py
@@ -6,6 +6,9 @@ def test_number(self):
         assert Interval.single(5) == Interval(lower=5, upper=5, lower_closed=True, upper_closed=True)
         assert Interval.from_string("5") == Interval.single(5)
 
+    def test_interval_gt_0(self):
+        assert Interval.from_string(">0") == Interval(lower=0, upper=None, lower_closed=False, upper_closed=False)
+
     def test_string_permutations(self):
         assert Interval.from_string("<5") == Interval.from_string("<  5")
         assert Interval.from_string("5<") == Interval.from_string("5 < x")

From 4844fd6917bb1fe7ee7006d82bfa3e73c6830fbb Mon Sep 17 00:00:00 2001
From: "Tristan F." <LeoDog896@hotmail.com>
Date: Thu, 6 Nov 2025 08:33:32 +0000
Subject: [PATCH 16/16] style: fmt

---
 test/heuristics/test_heuristics.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/heuristics/test_heuristics.py b/test/heuristics/test_heuristics.py
index 3512915e0..8011f5377 100644
--- a/test/heuristics/test_heuristics.py
+++ b/test/heuristics/test_heuristics.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+
 import pytest
 
 from spras.config.heuristics import GraphHeuristics, GraphHeuristicsError
@@ -16,11 +17,11 @@ def test_nonempty(self):
     def test_empty(self):
         self.parse({ 'number_of_nodes': '<1' }
                        ).validate_graph_from_file(FIXTURES_DIR / 'empty.txt')
-        
+
         with pytest.raises(GraphHeuristicsError):
             self.parse({ 'number_of_nodes': '0<' }
                        ).validate_graph_from_file(FIXTURES_DIR / 'empty.txt')
 
     def test_undirected(self):
         self.parse({ 'number_of_nodes': '1 < x < 3', 'number_of_edges': 2 }
-                    ).validate_graph_from_file(FIXTURES_DIR / 'undirected.txt')
\ No newline at end of file
+                    ).validate_graph_from_file(FIXTURES_DIR / 'undirected.txt')