From 311ef10eaf95130062f106b3ba2efd07653ef327 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Sat, 5 Jul 2025 20:04:17 -0700 Subject: [PATCH 01/27] Add caching to fsm crawl --- interegular/fsm.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 67e5ca4..4f728f9 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -974,9 +974,17 @@ def crawl(alphabet, initial, final, follow): forever if you supply an evil version of follow(). """ + def get_hash(obj): + if isinstance(obj, set): + return hash(frozenset(obj)) + elif isinstance(obj, dict): + return hash(tuple(sorted(obj.items()))) + return hash(obj) + states = [initial] + state_idx = {get_hash(initial): 0} finals = set() - map = {} + transition_map = {} # iterate over a growing list i = 0 @@ -988,20 +996,23 @@ def crawl(alphabet, initial, final, follow): finals.add(i) # compute map for this state - map[i] = {} + transition_map[i] = {} for transition in alphabet.by_transition: try: - next = follow(state, transition) + next_state = follow(state, transition) + next_hash = get_hash(next_state) except OblivionError: # Reached an oblivion state. Don't list it. continue else: try: - j = states.index(next) - except ValueError: + j = state_idx[next_hash] + except KeyError: j = len(states) - states.append(next) - map[i][transition] = j + states.append(next_state) + if next_hash not in state_idx: + state_idx[next_hash] = j + transition_map[i][transition] = j i += 1 @@ -1010,6 +1021,6 @@ def crawl(alphabet, initial, final, follow): states=range(len(states)), initial=0, finals=finals, - map=map, + map=transition_map, __no_validation__=True, - ) + ) \ No newline at end of file From 14d752d54320a2ef49d1ccfbf52da2352cc6b41f Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Sat, 5 Jul 2025 21:45:33 -0700 Subject: [PATCH 02/27] Refactor crawl to check cache only once --- interegular/fsm.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 4f728f9..26ee73a 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -4,7 +4,7 @@ from _collections import deque from collections import defaultdict from functools import total_ordering -from typing import Any, Set, Dict, Union, NewType, Mapping, Tuple, Iterable +from typing import Any, Set, Dict, Union, NewType, Mapping, Tuple, Iterable, Callable from interegular.utils import soft_repr @@ -350,14 +350,14 @@ def follow(current, new_transition): next FSM if we reach the end of the current one TODO: improve all follow() implementations to allow for dead metastates? """ - next = set() + next_set = set() for (i, substate) in current: fsm = fsms[i] if substate in fsm.map and new_to_old[i][new_transition] in fsm.map[substate]: - next.update(connect_all(i, fsm.map[substate][new_to_old[i][new_transition]])) - if not next: + next_set.update(connect_all(i, fsm.map[substate][new_to_old[i][new_transition]])) + if not next_set: raise OblivionError - return frozenset(next) + return frozenset(next_set) return crawl(alphabet, initial, final, follow) @@ -966,7 +966,7 @@ def crawl_hash_no_result(alphabet, initial, final, follow): unvisited.add(new) -def crawl(alphabet, initial, final, follow): +def crawl(alphabet: Alphabet, initial: any, final: Callable[[any], bool], follow: Callable[[any, any], any]): """ Given the above conditions and instructions, crawl a new unknown FSM, mapping its states, final states and transitions. Return the new FSM. @@ -1000,18 +1000,20 @@ def get_hash(obj): for transition in alphabet.by_transition: try: next_state = follow(state, transition) - next_hash = get_hash(next_state) + except OblivionError: # Reached an oblivion state. Don't list it. continue + else: - try: + next_hash = get_hash(next_state) + if next_hash in state_idx: j = state_idx[next_hash] - except KeyError: + else: j = len(states) states.append(next_state) - if next_hash not in state_idx: - state_idx[next_hash] = j + state_idx[next_hash] = j + transition_map[i][transition] = j i += 1 From 9d1cfe786c2de2db9d189dc8e7fadc8f234a0c99 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Sat, 5 Jul 2025 22:07:52 -0700 Subject: [PATCH 03/27] Add typing to by_transition property of Alphabet --- interegular/fsm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 26ee73a..a17446a 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -4,7 +4,7 @@ from _collections import deque from collections import defaultdict from functools import total_ordering -from typing import Any, Set, Dict, Union, NewType, Mapping, Tuple, Iterable, Callable +from typing import Any, Set, Dict, Union, NewType, Mapping, Tuple, Iterable, Callable, List from interegular.utils import soft_repr @@ -69,7 +69,7 @@ def nice_char_group(chars: Iterable[Union[str, _AnythingElseCls]]): class Alphabet(Mapping[Any, TransitionKey]): @property - def by_transition(self): + def by_transition(self) -> Dict[TransitionKey, List[Union[str, _AnythingElseCls]]]: return self._by_transition def __str__(self): @@ -95,7 +95,7 @@ def __init__(self, symbol_mapping: Dict[Union[str, _AnythingElseCls], Transition by_transition = defaultdict(list) for s, t in self._symbol_mapping.items(): by_transition[t].append(s) - self._by_transition = dict(by_transition) + self._by_transition: Dict[TransitionKey, List[Union[str, _AnythingElseCls]]] = dict(by_transition) def __getitem__(self, item): if item not in self._symbol_mapping: @@ -966,7 +966,7 @@ def crawl_hash_no_result(alphabet, initial, final, follow): unvisited.add(new) -def crawl(alphabet: Alphabet, initial: any, final: Callable[[any], bool], follow: Callable[[any, any], any]): +def crawl(alphabet: Alphabet, initial: Any, final: Callable[[Any], bool], follow: Callable[[Any, Any], Set[Any]]): """ Given the above conditions and instructions, crawl a new unknown FSM, mapping its states, final states and transitions. Return the new FSM. @@ -982,7 +982,7 @@ def get_hash(obj): return hash(obj) states = [initial] - state_idx = {get_hash(initial): 0} + state_idx: Dict[int, int] = {get_hash(initial): 0} finals = set() transition_map = {} From db53c77ee0643068c398e7d8de92e04264958b56 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Mon, 7 Jul 2025 09:06:07 -0700 Subject: [PATCH 04/27] Revert follow var name change --- interegular/fsm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index a17446a..bad7c9a 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -350,14 +350,14 @@ def follow(current, new_transition): next FSM if we reach the end of the current one TODO: improve all follow() implementations to allow for dead metastates? """ - next_set = set() + next = set() for (i, substate) in current: fsm = fsms[i] if substate in fsm.map and new_to_old[i][new_transition] in fsm.map[substate]: - next_set.update(connect_all(i, fsm.map[substate][new_to_old[i][new_transition]])) - if not next_set: + next.update(connect_all(i, fsm.map[substate][new_to_old[i][new_transition]])) + if not next: raise OblivionError - return frozenset(next_set) + return frozenset(next) return crawl(alphabet, initial, final, follow) @@ -966,7 +966,7 @@ def crawl_hash_no_result(alphabet, initial, final, follow): unvisited.add(new) -def crawl(alphabet: Alphabet, initial: Any, final: Callable[[Any], bool], follow: Callable[[Any, Any], Set[Any]]): +def crawl(alphabet: Alphabet, initial: Any, final: Callable[[Any], bool], follow: Callable[[Any, TransitionKey], Any]): """ Given the above conditions and instructions, crawl a new unknown FSM, mapping its states, final states and transitions. Return the new FSM. From 75fbb80a9f114be6e129adf9fc61da6f702ad9d7 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Mon, 7 Jul 2025 13:33:23 -0700 Subject: [PATCH 05/27] Refactor initial frozenset creation in concatenate --- interegular/fsm.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index bad7c9a..321b83b 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -332,10 +332,9 @@ def connect_all(i, substate): # Use a superset containing states from all FSMs at once. # We start at the start of the first FSM. If this state is final in the # first FSM, then we are also at the start of the second FSM. And so on. - initial = set() + initial = frozenset() if len(fsms) > 0: - initial.update(connect_all(0, fsms[0].initial)) - initial = frozenset(initial) + initial = frozenset(connect_all(0, fsms[0].initial)) def final(state): """If you're in a final state of the final FSM, it's final""" From fa759c998864fd115465c799d6895a60ab93aea3 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Mon, 7 Jul 2025 14:22:35 -0700 Subject: [PATCH 06/27] Use list appends instead of set adds in connect_all for speed boost --- interegular/fsm.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 321b83b..c950a3a 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -322,19 +322,19 @@ def connect_all(i, substate): (if it's final) the first state from the next FSM, plus (if that's final) the first state from the next but one FSM, plus... """ - result = {(i, substate)} + result = [(i, substate)] while i < last_index and substate in fsms[i].finals: i += 1 substate = fsms[i].initial - result.add((i, substate)) - return result + result.append((i, substate)) + return frozenset(result) # Use a superset containing states from all FSMs at once. # We start at the start of the first FSM. If this state is final in the # first FSM, then we are also at the start of the second FSM. And so on. initial = frozenset() if len(fsms) > 0: - initial = frozenset(connect_all(0, fsms[0].initial)) + initial = connect_all(0, fsms[0].initial) def final(state): """If you're in a final state of the final FSM, it's final""" @@ -349,14 +349,15 @@ def follow(current, new_transition): next FSM if we reach the end of the current one TODO: improve all follow() implementations to allow for dead metastates? """ - next = set() + next_states = set() for (i, substate) in current: fsm = fsms[i] - if substate in fsm.map and new_to_old[i][new_transition] in fsm.map[substate]: - next.update(connect_all(i, fsm.map[substate][new_to_old[i][new_transition]])) - if not next: + current_vertex: TransitionKey = new_to_old[i][new_transition] + if substate in fsm.map and current_vertex in fsm.map[substate]: + next_states.update(connect_all(i, fsm.map[substate][current_vertex])) + if not next_states: raise OblivionError - return frozenset(next) + return frozenset(next_states) return crawl(alphabet, initial, final, follow) From 334507585a2c5658111773e1a4a8ab28352e30e7 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Mon, 7 Jul 2025 14:31:22 -0700 Subject: [PATCH 07/27] Switch from set adds to list appends in star follow --- interegular/fsm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index c950a3a..e71097c 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -382,22 +382,22 @@ def star(self): initial = {self.initial} def follow(state, transition): - next = set() + next_states = [] for substate in state: if substate in self.map and transition in self.map[substate]: - next.add(self.map[substate][transition]) + next_states.append(self.map[substate][transition]) # If one of our substates is final, then we can also consider # transitions from the initial state of the original FSM. if substate in self.finals \ and self.initial in self.map \ and transition in self.map[self.initial]: - next.add(self.map[self.initial][transition]) + next_states.append(self.map[self.initial][transition]) - if not next: + if not next_states: raise OblivionError - return frozenset(next) + return frozenset(next_states) def final(state): return any(substate in self.finals for substate in state) From e56ff569f56f13bcbafe15e203df0c9aea63a9b8 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Mon, 7 Jul 2025 14:40:33 -0700 Subject: [PATCH 08/27] Switch to itertools chaining from repeated updates in reversed follow --- interegular/fsm.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index e71097c..f69ec48 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -5,6 +5,7 @@ from collections import defaultdict from functools import total_ordering from typing import Any, Set, Dict, Union, NewType, Mapping, Tuple, Iterable, Callable, List +from itertools import chain from interegular.utils import soft_repr @@ -578,12 +579,17 @@ def reversed(self): # Find every possible way to reach the current state-set # using this symbol. def follow(current, transition): - next_states = set() - for state in current: - next_states.update(reverse_map.get((state, transition), set())) + _empty_set = set() # reuse to avoid unnecessary allocations + + next_states_iter = ( + reverse_map.get((state, transition), _empty_set) + for state in current + ) + next_states = frozenset(chain.from_iterable(next_states_iter)) + if not next_states: raise OblivionError - return frozenset(next_states) + return next_states # A state-set is final if the initial state is in it. def final(state): From 1a0987efc0647fda5b79a61fb4b86ce0fef934c2 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Tue, 8 Jul 2025 16:51:28 -0700 Subject: [PATCH 09/27] Switch to list appends in concatenate follow --- interegular/fsm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index f69ec48..f17360c 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -350,15 +350,15 @@ def follow(current, new_transition): next FSM if we reach the end of the current one TODO: improve all follow() implementations to allow for dead metastates? """ - next_states = set() + next_states = [] for (i, substate) in current: fsm = fsms[i] current_vertex: TransitionKey = new_to_old[i][new_transition] if substate in fsm.map and current_vertex in fsm.map[substate]: - next_states.update(connect_all(i, fsm.map[substate][current_vertex])) + next_states.append(connect_all(i, fsm.map[substate][current_vertex])) if not next_states: raise OblivionError - return frozenset(next_states) + return frozenset(chain.from_iterable(next_states)) return crawl(alphabet, initial, final, follow) From c71c9642fa70cd69e04b1f989446e97c5c951cdd Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Tue, 8 Jul 2025 18:24:45 -0700 Subject: [PATCH 10/27] Use defaultdict in reversed --- interegular/fsm.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index f17360c..99e26a5 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -569,11 +569,9 @@ def reversed(self): initial = frozenset(self.finals) # Speed up follow by pre-computing reverse-transition map - reverse_map = {} + reverse_map = defaultdict(set) for state, transition_map in self.map.items(): for transition, next_state in transition_map.items(): - if (next_state, transition) not in reverse_map: - reverse_map[(next_state, transition)] = set() reverse_map[(next_state, transition)].add(state) # Find every possible way to reach the current state-set From aed6c28a33ebf82f3007721755635e4fcec01018 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Tue, 8 Jul 2025 21:34:48 -0700 Subject: [PATCH 11/27] Remove unnecessary list creation before tuple conversion --- interegular/fsm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 99e26a5..cd14a2c 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -120,11 +120,11 @@ def union(*alphabets: 'Alphabet') -> 'Tuple[Alphabet, Tuple[Dict[TransitionKey, result = Alphabet({symbol: keys_to_key[keys] for keys, symbols in keys_to_symbols.items() for symbol in symbols}) - new_to_old_mappings = [{} for _ in alphabets] + new_to_old_mappings = tuple({} for _ in alphabets) for keys, new_key in keys_to_key.items(): for old_key, new_to_old in zip(keys, new_to_old_mappings): new_to_old[new_key] = old_key - return result, tuple(new_to_old_mappings) + return result, new_to_old_mappings @classmethod def from_groups(cls, *groups): @@ -140,13 +140,13 @@ def intersect(self, other: 'Alphabet') -> 'Tuple[Alphabet, Tuple[Dict[Transition result = Alphabet({symbol: keys_to_key[keys] for keys, symbols in keys_to_symbols.items() for symbol in symbols}) - old_to_new_mappings = [defaultdict(list) for _ in (self, other)] - new_to_old_mappings = [{} for _ in (self, other)] + old_to_new_mappings = defaultdict(list), defaultdict(list) + new_to_old_mappings = {}, {} for keys, new_key in keys_to_key.items(): for old_key, old_to_new, new_to_old in zip(keys, old_to_new_mappings, new_to_old_mappings): old_to_new[old_key].append(new_key) new_to_old[new_key] = old_key - return result, tuple(new_to_old_mappings) + return result, new_to_old_mappings def copy(self): return Alphabet(self._symbol_mapping.copy()) From a7beaab57f8659d67271e8d083a549ce92db1d03 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Wed, 9 Jul 2025 20:16:13 -0700 Subject: [PATCH 12/27] Use frozenset instead of set to store livestates --- interegular/fsm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index cd14a2c..804a8a8 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -652,7 +652,7 @@ def strings(self, max_iterations=None): # Many FSMs have "dead states". Once you reach a dead state, you can no # longer reach a final state. Since many strings may end up here, it's # advantageous to constrain our search to live states only. - livestates = set(state for state in self.states if self.islive(state)) + livestates = frozenset(state for state in self.states if self.islive(state)) # We store a list of tuples. Each tuple consists of an input string and the # state that this input string leads to. This means we don't have to run the From 5ae100ed85b7fbac12a4186887458f0dc4816c0d Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Wed, 9 Jul 2025 21:08:46 -0700 Subject: [PATCH 13/27] Use unvisited list instead of set in crawl_hash_no_result --- interegular/fsm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 804a8a8..cd9200b 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -948,7 +948,7 @@ def final(state, fsm_range=tuple(enumerate(fsms))): def crawl_hash_no_result(alphabet, initial, final, follow): - unvisited = {initial} + unvisited = [initial] visited = set() while unvisited: @@ -967,7 +967,7 @@ def crawl_hash_no_result(alphabet, initial, final, follow): continue else: if new not in visited: - unvisited.add(new) + unvisited.append(new) def crawl(alphabet: Alphabet, initial: Any, final: Callable[[Any], bool], follow: Callable[[Any, TransitionKey], Any]): From 82356fec61dad6d18b6970a6aea3622f90c5e8a5 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Wed, 9 Jul 2025 21:46:32 -0700 Subject: [PATCH 14/27] Rename next to next_state in times follow function --- interegular/fsm.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index cd9200b..ff8b8bb 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -407,7 +407,7 @@ def final(state): base.__dict__['finals'] = base.finals | {base.initial} return base - def times(self, multiplier): + def times(self, multiplier: int): """ Given an FSM and a multiplier, return the multiplied FSM. """ @@ -428,18 +428,19 @@ def final(state): return False def follow(current, transition): - next = [] + next_state = [] for (substate, iteration) in current: if iteration < multiplier \ and substate in self.map \ and transition in self.map[substate]: - next.append((self.map[substate][transition], iteration)) + current_state = self.map[substate][transition] + next_state.append((current_state, iteration)) # final of self? merge with initial on next iteration - if self.map[substate][transition] in self.finals: - next.append((self.initial, iteration + 1)) - if len(next) == 0: + if current_state in self.finals: + next_state.append((self.initial, iteration + 1)) + if len(next_state) == 0: raise OblivionError - return frozenset(next) + return frozenset(next_state) return crawl(alphabet, initial, final, follow) From b71c9f8e683cd57c34761499f5405f29a5c99c9a Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Wed, 9 Jul 2025 22:48:05 -0700 Subject: [PATCH 15/27] Optimize accepts function --- interegular/fsm.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index ff8b8bb..8fcef24 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -218,7 +218,7 @@ def __init__(self, alphabet: Alphabet, states, initial, finals, map, *, __no_val self.__dict__["finals"] = frozenset(finals) self.__dict__["map"] = map - def accepts(self, input: str): + def accepts(self, input_str: str): """ Test whether the present FSM accepts the supplied string (iterable of symbols). Equivalently, consider `self` as a possibly-infinite set of @@ -228,13 +228,19 @@ def accepts(self, input: str): alphabet will be converted to `fsm.anything_else`. """ state = self.initial - for symbol in input: - if anything_else in self.alphabet and not symbol in self.alphabet: + anything_else_in_alphabet = anything_else in self.alphabet + + for symbol in input_str: + if anything_else_in_alphabet and not symbol in self.alphabet: symbol = anything_else + + if state not in self.map: + return False + transition = self.alphabet[symbol] # Missing transition = transition to dead state - if not (state in self.map and transition in self.map[state]): + if transition not in self.map[state]: return False state = self.map[state][transition] From 800677666a9490c62bf6768226fd2bdcf45899c5 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Thu, 10 Jul 2025 13:29:24 -0700 Subject: [PATCH 16/27] Separate loops in accepts to avoid anything else check every iteration --- interegular/fsm.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 8fcef24..0d2a916 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -228,22 +228,34 @@ def accepts(self, input_str: str): alphabet will be converted to `fsm.anything_else`. """ state = self.initial - anything_else_in_alphabet = anything_else in self.alphabet + if anything_else in self.alphabet: - for symbol in input_str: - if anything_else_in_alphabet and not symbol in self.alphabet: - symbol = anything_else + for symbol in input_str: + if not symbol in self.alphabet: + symbol = anything_else + + if state not in self.map: + return False - if state not in self.map: - return False - - transition = self.alphabet[symbol] + transition = self.alphabet[symbol] + + # Missing transition = transition to dead state + if transition not in self.map[state]: + return False - # Missing transition = transition to dead state - if transition not in self.map[state]: - return False + state = self.map[state][transition] + else: + for symbol in input_str: + if state not in self.map: + return False + + transition = self.alphabet[symbol] + + # Missing transition = transition to dead state + if transition not in self.map[state]: + return False - state = self.map[state][transition] + state = self.map[state][transition] return state in self.finals def __contains__(self, string): From 640cc067f1793b5d37a7a85d76488697202c9fb3 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Fri, 11 Jul 2025 17:31:49 -0700 Subject: [PATCH 17/27] Remove unnecessary tuple conversion in fsm range enumeration for parallel --- interegular/fsm.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 0d2a916..fd9efad 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -945,21 +945,28 @@ def parallel(fsms, test): # dedicated function accepts a "superset" and returns the next "superset" # obtained by following this transition in the new FSM - def follow(current, new_transition, fsm_range=tuple(enumerate(fsms))): - next = {} + def follow(current, new_transition, fsm_range=None): + fsm_range = fsm_range or enumerate(fsms) + next_state = {} + for i, f in fsm_range: + if i not in current: + continue + old_transition = new_to_old[i][new_transition] - if i in current \ - and current[i] in f.map \ - and old_transition in f.map[current[i]]: - next[i] = f.map[current[i]][old_transition] - if not next: + + current_i = current[i] + if current_i in f.map and old_transition in f.map[current_i]: + next_state[i] = f.map[current_i][old_transition] + + if not next_state: raise OblivionError - return next + return next_state # Determine the "is final?" condition of each substate, then pass it to the # test to determine finality of the overall FSM. - def final(state, fsm_range=tuple(enumerate(fsms))): + def final(state, fsm_range=None): + fsm_range = fsm_range or enumerate(fsms) accepts = [i in state and state[i] in fsm.finals for (i, fsm) in fsm_range] return test(accepts) From 090cb0d4ceb2a44cc62019b18c365a915135c5f8 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Fri, 11 Jul 2025 18:04:29 -0700 Subject: [PATCH 18/27] Add primitive type hints in fsm --- interegular/fsm.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index fd9efad..bb0d7d2 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -24,19 +24,19 @@ class _AnythingElseCls: fsm.anything_else, then follow the appropriate transition. """ - def __str__(self): + def __str__(self) -> str: return "anything_else" - def __repr__(self): + def __repr__(self) -> str: return "anything_else" - def __lt__(self, other): + def __lt__(self, other) -> bool: return False - def __eq__(self, other): + def __eq__(self, other) -> bool: return self is other - def __hash__(self): + def __hash__(self) -> int: return hash(id(self)) @@ -45,7 +45,7 @@ def __hash__(self): anything_else = _AnythingElseCls() -def nice_char_group(chars: Iterable[Union[str, _AnythingElseCls]]): +def nice_char_group(chars: Iterable[Union[str, _AnythingElseCls]]) -> str: out = [] current_range = [] for c in sorted(chars): @@ -73,7 +73,7 @@ class Alphabet(Mapping[Any, TransitionKey]): def by_transition(self) -> Dict[TransitionKey, List[Union[str, _AnythingElseCls]]]: return self._by_transition - def __str__(self): + def __str__(self) -> str: out = [] width = 0 for tk, symbols in sorted(self._by_transition.items()): @@ -82,7 +82,7 @@ def __str__(self): width = len(out[-1][0]) return '\n'.join(f"{a:{width}} | {b}" for a, b in out) - def __repr__(self): + def __repr__(self) -> str: return f"{type(self).__name__}({self._symbol_mapping!r})" def __len__(self) -> int: @@ -107,7 +107,7 @@ def __getitem__(self, item): else: return self._symbol_mapping[item] - def __contains__(self, item): + def __contains__(self, item) -> bool: return item in self._symbol_mapping def union(*alphabets: 'Alphabet') -> 'Tuple[Alphabet, Tuple[Dict[TransitionKey, TransitionKey], ...]]': @@ -218,7 +218,7 @@ def __init__(self, alphabet: Alphabet, states, initial, finals, map, *, __no_val self.__dict__["finals"] = frozenset(finals) self.__dict__["map"] = map - def accepts(self, input_str: str): + def accepts(self, input_str: str) -> bool: """ Test whether the present FSM accepts the supplied string (iterable of symbols). Equivalently, consider `self` as a possibly-infinite set of @@ -258,7 +258,7 @@ def accepts(self, input_str: str): state = self.map[state][transition] return state in self.finals - def __contains__(self, string): + def __contains__(self, string) -> bool: """ This lets you use the syntax `"a" in fsm1` to see whether the string "a" is in the set of strings accepted by `fsm1`. @@ -273,7 +273,7 @@ def reduce(self): """ return self.reversed().reversed() - def __repr__(self): + def __repr__(self) -> str: string = "fsm(" string += "alphabet = " + repr(self.alphabet) string += ", states = " + repr(self.states) @@ -283,7 +283,7 @@ def __repr__(self): string += ")" return string - def __str__(self): + def __str__(self) -> str: rows = [] # top row From 3e5b52ee5e8016e6550df5013d726d1915dc11ee Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Fri, 11 Jul 2025 18:10:23 -0700 Subject: [PATCH 19/27] Create Symbol type for type hinting --- interegular/fsm.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index bb0d7d2..58dafd2 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -44,8 +44,9 @@ def __hash__(self) -> int: # value gets serialised. Otherwise this would just be `object()`. anything_else = _AnythingElseCls() +Symbol = Union[str, _AnythingElseCls] -def nice_char_group(chars: Iterable[Union[str, _AnythingElseCls]]) -> str: +def nice_char_group(chars: Iterable[Symbol]) -> str: out = [] current_range = [] for c in sorted(chars): @@ -70,7 +71,7 @@ def nice_char_group(chars: Iterable[Union[str, _AnythingElseCls]]) -> str: class Alphabet(Mapping[Any, TransitionKey]): @property - def by_transition(self) -> Dict[TransitionKey, List[Union[str, _AnythingElseCls]]]: + def by_transition(self) -> Dict[TransitionKey, List[Symbol]]: return self._by_transition def __str__(self) -> str: @@ -91,12 +92,12 @@ def __len__(self) -> int: def __iter__(self): return iter(self._symbol_mapping) - def __init__(self, symbol_mapping: Dict[Union[str, _AnythingElseCls], TransitionKey]): + def __init__(self, symbol_mapping: Dict[Symbol, TransitionKey]): self._symbol_mapping = symbol_mapping by_transition = defaultdict(list) for s, t in self._symbol_mapping.items(): by_transition[t].append(s) - self._by_transition: Dict[TransitionKey, List[Union[str, _AnythingElseCls]]] = dict(by_transition) + self._by_transition: Dict[TransitionKey, List[Symbol]] = dict(by_transition) def __getitem__(self, item): if item not in self._symbol_mapping: From aa7e19a87345ef10db0d18fdfab7c77645e5fa29 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Fri, 11 Jul 2025 18:23:05 -0700 Subject: [PATCH 20/27] Rename fsm map to transition_map to avoid reserve word collision --- interegular/fsm.py | 102 ++++++++++++++++++++-------------------- interegular/patterns.py | 4 +- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 58dafd2..748e94b 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -181,13 +181,13 @@ class FSM: initial: State states: Set[State] finals: Set[State] - map: Dict[State, Dict[TransitionKey, State]] + transition_map: Dict[State, Dict[TransitionKey, State]] def __setattr__(self, name, value): """Immutability prevents some potential problems.""" raise Exception("This object is immutable.") - def __init__(self, alphabet: Alphabet, states, initial, finals, map, *, __no_validation__=False): + def __init__(self, alphabet: Alphabet, states, initial, finals, transition_map, *, __no_validation__=False): """ `alphabet` is an iterable of symbols the FSM can be fed. `states` is the set of states for the FSM @@ -205,19 +205,19 @@ def __init__(self, alphabet: Alphabet, states, initial, finals, map, *, __no_val raise Exception("Initial state " + repr(initial) + " must be one of " + repr(states)) if not finals.issubset(states): raise Exception("Final states " + repr(finals) + " must be a subset of " + repr(states)) - for state in map.keys(): - for symbol in map[state]: - if not map[state][symbol] in states: + for state in transition_map.keys(): + for symbol in transition_map[state]: + if not transition_map[state][symbol] in states: raise Exception( "Transition for state " + repr(state) + " and symbol " + repr(symbol) + " leads to " + repr( - map[state][symbol]) + ", which is not a state") + transition_map[state][symbol]) + ", which is not a state") # Initialise the hard way due to immutability. self.__dict__["alphabet"] = alphabet self.__dict__["states"] = frozenset(states) self.__dict__["initial"] = initial self.__dict__["finals"] = frozenset(finals) - self.__dict__["map"] = map + self.__dict__["transition_map"] = transition_map def accepts(self, input_str: str) -> bool: """ @@ -235,28 +235,28 @@ def accepts(self, input_str: str) -> bool: if not symbol in self.alphabet: symbol = anything_else - if state not in self.map: + if state not in self.transition_map: return False transition = self.alphabet[symbol] # Missing transition = transition to dead state - if transition not in self.map[state]: + if transition not in self.transition_map[state]: return False - state = self.map[state][transition] + state = self.transition_map[state][transition] else: for symbol in input_str: - if state not in self.map: + if state not in self.transition_map: return False transition = self.alphabet[symbol] # Missing transition = transition to dead state - if transition not in self.map[state]: + if transition not in self.transition_map[state]: return False - state = self.map[state][transition] + state = self.transition_map[state][transition] return state in self.finals def __contains__(self, string) -> bool: @@ -280,7 +280,7 @@ def __repr__(self) -> str: string += ", states = " + repr(self.states) string += ", initial = " + repr(self.initial) string += ", finals = " + repr(self.finals) - string += ", map = " + repr(self.map) + string += ", map = " + repr(self.transition_map) string += ")" return string @@ -306,8 +306,8 @@ def __str__(self) -> str: else: row.append("False") for symbol, transition in sorted(self.alphabet.items()): - if state in self.map and transition in self.map[state]: - row.append(str(self.map[state][transition])) + if state in self.transition_map and transition in self.transition_map[state]: + row.append(str(self.transition_map[state][transition])) else: row.append("") rows.append(row) @@ -373,8 +373,8 @@ def follow(current, new_transition): for (i, substate) in current: fsm = fsms[i] current_vertex: TransitionKey = new_to_old[i][new_transition] - if substate in fsm.map and current_vertex in fsm.map[substate]: - next_states.append(connect_all(i, fsm.map[substate][current_vertex])) + if substate in fsm.transition_map and current_vertex in fsm.transition_map[substate]: + next_states.append(connect_all(i, fsm.transition_map[substate][current_vertex])) if not next_states: raise OblivionError return frozenset(chain.from_iterable(next_states)) @@ -404,15 +404,15 @@ def star(self): def follow(state, transition): next_states = [] for substate in state: - if substate in self.map and transition in self.map[substate]: - next_states.append(self.map[substate][transition]) + if substate in self.transition_map and transition in self.transition_map[substate]: + next_states.append(self.transition_map[substate][transition]) # If one of our substates is final, then we can also consider # transitions from the initial state of the original FSM. if substate in self.finals \ - and self.initial in self.map \ - and transition in self.map[self.initial]: - next_states.append(self.map[self.initial][transition]) + and self.initial in self.transition_map \ + and transition in self.transition_map[self.initial]: + next_states.append(self.transition_map[self.initial][transition]) if not next_states: raise OblivionError @@ -450,9 +450,9 @@ def follow(current, transition): next_state = [] for (substate, iteration) in current: if iteration < multiplier \ - and substate in self.map \ - and transition in self.map[substate]: - current_state = self.map[substate][transition] + and substate in self.transition_map \ + and transition in self.transition_map[substate]: + current_state = self.transition_map[substate][transition] next_state.append((current_state, iteration)) # final of self? merge with initial on next iteration if current_state in self.finals: @@ -534,8 +534,8 @@ def everythingbut(self): def follow(current, transition): next = {} - if 0 in current and current[0] in self.map and transition in self.map[current[0]]: - next[0] = self.map[current[0]][transition] + if 0 in current and current[0] in self.transition_map and transition in self.transition_map[current[0]]: + next[0] = self.transition_map[current[0]][transition] return next # state is final unless the original was @@ -552,12 +552,12 @@ def isdisjoint(self, other: 'FSM') -> bool: # obtained by following this transition in the new FSM def follow(current, transition): ss, os = current - if ss in self.map and new_to_old[0][transition] in self.map[ss]: - sn = self.map[ss][new_to_old[0][transition]] + if ss in self.transition_map and new_to_old[0][transition] in self.transition_map[ss]: + sn = self.transition_map[ss][new_to_old[0][transition]] else: sn = None - if os in other.map and new_to_old[1][transition] in other.map[os]: - on = other.map[os][new_to_old[1][transition]] + if os in other.transition_map and new_to_old[1][transition] in other.transition_map[os]: + on = other.transition_map[os][new_to_old[1][transition]] else: on = None if not sn or not on: @@ -590,7 +590,7 @@ def reversed(self): # Speed up follow by pre-computing reverse-transition map reverse_map = defaultdict(set) - for state, transition_map in self.map.items(): + for state, transition_map in self.transition_map.items(): for transition, next_state in transition_map.items(): reverse_map[(next_state, transition)].add(state) @@ -634,9 +634,9 @@ def islive(self, state): current = reachable[i] if current in self.finals: return True - if current in self.map: - for transition in self.map[current]: - next = self.map[current][transition] + if current in self.transition_map: + for transition in self.transition_map[current]: + next = self.transition_map[current][transition] if next not in seen: reachable.append(next) seen.add(next) @@ -695,9 +695,9 @@ def strings(self, max_iterations=None): while strings: (cstring, cstate) = strings.popleft() i += 1 - if cstate in self.map: - for transition in sorted(self.map[cstate]): - nstate = self.map[cstate][transition] + if cstate in self.transition_map: + for transition in sorted(self.transition_map[cstate]): + nstate = self.transition_map[cstate][transition] if nstate in livestates: for symbol in sorted(self.alphabet.by_transition[transition]): nstring = cstring + [symbol] @@ -772,9 +772,9 @@ def get_num_strings(state): n = 0 if state in self.finals: n += 1 - if state in self.map: - for transition in self.map[state]: - n += get_num_strings(self.map[state][transition]) * len(self.alphabet.by_transition[transition]) + if state in self.transition_map: + for transition in self.transition_map[state]: + n += get_num_strings(self.transition_map[state][transition]) * len(self.alphabet.by_transition[transition]) num_strings[state] = n else: @@ -858,7 +858,7 @@ def copy(self): states=self.states.copy(), initial=self.initial, finals=self.finals.copy(), - map=self.map.copy(), + transition_map=self.transition_map.copy(), __no_validation__=True, ) @@ -880,10 +880,10 @@ def derive(self, input): symbol = anything_else # Missing transition = transition to dead state - if not (state in self.map and self.alphabet[symbol] in self.map[state]): + if not (state in self.transition_map and self.alphabet[symbol] in self.transition_map[state]): raise OblivionError - state = self.map[state][self.alphabet[symbol]] + state = self.transition_map[state][self.alphabet[symbol]] # OK so now we have consumed that string, use the new location as the # starting point. @@ -892,7 +892,7 @@ def derive(self, input): states=self.states, initial=state, finals=self.finals, - map=self.map, + transition_map=self.transition_map, __no_validation__=True, ) @@ -912,7 +912,7 @@ def null(alphabet): states={0}, initial=0, finals=set(), - map={ + transition_map={ 0: dict([(transition, 0) for transition in alphabet.by_transition]), }, __no_validation__=True, @@ -929,7 +929,7 @@ def epsilon(alphabet): states={0}, initial=0, finals={0}, - map={}, + transition_map={}, __no_validation__=True, ) @@ -957,8 +957,8 @@ def follow(current, new_transition, fsm_range=None): old_transition = new_to_old[i][new_transition] current_i = current[i] - if current_i in f.map and old_transition in f.map[current_i]: - next_state[i] = f.map[current_i][old_transition] + if current_i in f.transition_map and old_transition in f.transition_map[current_i]: + next_state[i] = f.transition_map[current_i][old_transition] if not next_state: raise OblivionError @@ -1054,6 +1054,6 @@ def get_hash(obj): states=range(len(states)), initial=0, finals=finals, - map=transition_map, + transition_map=transition_map, __no_validation__=True, ) \ No newline at end of file diff --git a/interegular/patterns.py b/interegular/patterns.py index bb09dbb..1994d5c 100644 --- a/interegular/patterns.py +++ b/interegular/patterns.py @@ -162,7 +162,7 @@ def to_fsm(self, alphabet=None, prefix_postfix=None, flags=REFlags(0)) -> FSM: states={0, 1}, initial=0, finals={1}, - map=mapping, + transition_map=mapping, ) def simplify(self) -> '_CharGroup': @@ -193,7 +193,7 @@ def to_fsm(self, alphabet=None, prefix_postfix=None, flags=REFlags(0)) -> FSM: states={0, 1}, initial=0, finals={1}, - map={0: {alphabet[sym]: 1 for sym in symbols}}, + transition_map={0: {alphabet[sym]: 1 for sym in symbols}}, ) def _get_alphabet(self, flags: REFlags) -> Alphabet: From e67b5d2433d399248b159ae07cfe99a9dabd8901 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Fri, 11 Jul 2025 20:24:19 -0700 Subject: [PATCH 21/27] Convert FSM to frozen dataclass --- interegular/fsm.py | 69 ++++++++++++++++++++++------------------- interegular/patterns.py | 8 ++--- 2 files changed, 41 insertions(+), 36 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 748e94b..1294b23 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -2,9 +2,10 @@ Finite state machine library, extracted from `greenery.fsm` and adapted by MegaIng """ from _collections import deque +from dataclasses import dataclass from collections import defaultdict from functools import total_ordering -from typing import Any, Set, Dict, Union, NewType, Mapping, Tuple, Iterable, Callable, List +from typing import Any, Set, Dict, Union, NewType, Mapping, Tuple, Iterable, Callable, List, Optional from itertools import chain from interegular.utils import soft_repr @@ -163,6 +164,7 @@ class OblivionError(Exception): pass +@dataclass(frozen=True) class FSM: """ A Finite State Machine or FSM has an alphabet and a set of states. At any @@ -178,16 +180,17 @@ class FSM: The majority of these methods are available using operator overloads. """ alphabet: Alphabet + states: frozenset[State] initial: State - states: Set[State] - finals: Set[State] + finals: frozenset[State] transition_map: Dict[State, Dict[TransitionKey, State]] + __no_validation__: Optional[bool] = True + + @property + def map(self) -> Dict[State, Dict[TransitionKey, State]]: + return self.transition_map - def __setattr__(self, name, value): - """Immutability prevents some potential problems.""" - raise Exception("This object is immutable.") - - def __init__(self, alphabet: Alphabet, states, initial, finals, transition_map, *, __no_validation__=False): + def __init__(self, alphabet: Alphabet, states: frozenset[State], initial: State, finals: frozenset[State], transition_map: Optional[Dict[State, Dict[TransitionKey, State]]]=None, __no_validation__: Optional[bool] = True, map: Optional[Dict[State, Dict[TransitionKey, State]]]=None): """ `alphabet` is an iterable of symbols the FSM can be fed. `states` is the set of states for the FSM @@ -196,28 +199,30 @@ def __init__(self, alphabet: Alphabet, states, initial, finals, transition_map, `map` may be sparse (i.e. it may omit transitions). In the case of omitted transitions, a non-final "oblivion" state is simulated. """ - - if not __no_validation__: + assert map is not None or transition_map is not None + if not self.__no_validation__: # Validation. Thanks to immutability, this only needs to be carried out once. - if not isinstance(alphabet, Alphabet): + if not isinstance(self.alphabet, Alphabet): raise TypeError("Expected an Alphabet instance") - if not initial in states: - raise Exception("Initial state " + repr(initial) + " must be one of " + repr(states)) - if not finals.issubset(states): - raise Exception("Final states " + repr(finals) + " must be a subset of " + repr(states)) - for state in transition_map.keys(): - for symbol in transition_map[state]: - if not transition_map[state][symbol] in states: + if not self.initial in self.states: + raise Exception("Initial state " + repr(self.initial) + " must be one of " + repr(self.states)) + if not self.finals.issubset(self.states): + raise Exception("Final states " + repr(self.finals) + " must be a subset of " + repr(self.states)) + for state in self.transition_map.keys(): + for symbol in self.transition_map[state]: + if not self.transition_map[state][symbol] in self.states: raise Exception( "Transition for state " + repr(state) + " and symbol " + repr(symbol) + " leads to " + repr( - transition_map[state][symbol]) + ", which is not a state") - - # Initialise the hard way due to immutability. - self.__dict__["alphabet"] = alphabet - self.__dict__["states"] = frozenset(states) - self.__dict__["initial"] = initial - self.__dict__["finals"] = frozenset(finals) - self.__dict__["transition_map"] = transition_map + self.transition_map[state][symbol]) + ", which is not a state") + + object.__setattr__(self, "alphabet", alphabet) + object.__setattr__(self, "states", states) + object.__setattr__(self, "initial", initial) + object.__setattr__(self, "finals", finals) + if transition_map is not None: + object.__setattr__(self, "transition_map", transition_map) + else: + object.__setattr__(self, "transition_map", map) def accepts(self, input_str: str) -> bool: """ @@ -909,9 +914,9 @@ def null(alphabet): """ return FSM( alphabet=alphabet, - states={0}, + states=frozenset({0}), initial=0, - finals=set(), + finals=frozenset(), transition_map={ 0: dict([(transition, 0) for transition in alphabet.by_transition]), }, @@ -926,9 +931,9 @@ def epsilon(alphabet): """ return FSM( alphabet=alphabet, - states={0}, + states=frozenset({0}), initial=0, - finals={0}, + finals=frozenset({0}), transition_map={}, __no_validation__=True, ) @@ -1051,9 +1056,9 @@ def get_hash(obj): return FSM( alphabet=alphabet, - states=range(len(states)), + states=frozenset(range(len(states))), initial=0, - finals=finals, + finals=frozenset(finals), transition_map=transition_map, __no_validation__=True, ) \ No newline at end of file diff --git a/interegular/patterns.py b/interegular/patterns.py index 1994d5c..d9154d1 100644 --- a/interegular/patterns.py +++ b/interegular/patterns.py @@ -159,9 +159,9 @@ def to_fsm(self, alphabet=None, prefix_postfix=None, flags=REFlags(0)) -> FSM: return FSM( alphabet=alphabet, - states={0, 1}, + states=frozenset({0, 1}), initial=0, - finals={1}, + finals=frozenset({1}), transition_map=mapping, ) @@ -190,9 +190,9 @@ def to_fsm(self, alphabet=None, prefix_postfix=None, flags=REFlags(0)) -> FSM: symbols = alphabet return FSM( alphabet=alphabet, - states={0, 1}, + states=frozenset({0, 1}), initial=0, - finals={1}, + finals=frozenset({1}), transition_map={0: {alphabet[sym]: 1 for sym in symbols}}, ) From ac6417e18ee5b955475598fa80a4f3cc4d37ecb0 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Fri, 11 Jul 2025 23:20:36 -0700 Subject: [PATCH 22/27] Refactor fsm validation and islive --- interegular/fsm.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 1294b23..0b211f3 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -208,12 +208,12 @@ def __init__(self, alphabet: Alphabet, states: frozenset[State], initial: State, raise Exception("Initial state " + repr(self.initial) + " must be one of " + repr(self.states)) if not self.finals.issubset(self.states): raise Exception("Final states " + repr(self.finals) + " must be a subset of " + repr(self.states)) - for state in self.transition_map.keys(): - for symbol in self.transition_map[state]: - if not self.transition_map[state][symbol] in self.states: + for state, transitions in self.transition_map.items(): + for next_state in transitions.values: + if not next_state in self.states: raise Exception( "Transition for state " + repr(state) + " and symbol " + repr(symbol) + " leads to " + repr( - self.transition_map[state][symbol]) + ", which is not a state") + next_state) + ", which is not a state") object.__setattr__(self, "alphabet", alphabet) object.__setattr__(self, "states", states) @@ -640,11 +640,11 @@ def islive(self, state): if current in self.finals: return True if current in self.transition_map: - for transition in self.transition_map[current]: - next = self.transition_map[current][transition] - if next not in seen: - reachable.append(next) - seen.add(next) + transitions = self.transition_map[current] + for next_state in transitions.values(): + if next_state not in seen: + reachable.append(next_state) + seen.add(next_state) i += 1 return False From 599181475cc3398f47411e7efc8942486a9a4104 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Sat, 12 Jul 2025 11:10:56 -0700 Subject: [PATCH 23/27] Reuse alphabet transition key set in crawl --- interegular/fsm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 0b211f3..cd20cb2 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -1016,6 +1016,8 @@ def get_hash(obj): elif isinstance(obj, dict): return hash(tuple(sorted(obj.items()))) return hash(obj) + + transitions_in_alphabet = alphabet.by_transition.keys() states = [initial] state_idx: Dict[int, int] = {get_hash(initial): 0} @@ -1033,7 +1035,7 @@ def get_hash(obj): # compute map for this state transition_map[i] = {} - for transition in alphabet.by_transition: + for transition in transitions_in_alphabet: try: next_state = follow(state, transition) From 85250edb90cb4eb5bdd649262663e59c1c09b7af Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Sat, 12 Jul 2025 11:51:39 -0700 Subject: [PATCH 24/27] Refactor cardinality function --- interegular/fsm.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index cd20cb2..b9e2a96 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -766,25 +766,24 @@ def cardinality(self): def get_num_strings(state): # Many FSMs have at least one oblivion state - if self.islive(state): - if state in num_strings: - if num_strings[state] is None: # "computing..." - # Recursion! There are infinitely many strings recognised - raise OverflowError(state) - return num_strings[state] - num_strings[state] = None # i.e. "computing..." - - n = 0 - if state in self.finals: - n += 1 - if state in self.transition_map: - for transition in self.transition_map[state]: - n += get_num_strings(self.transition_map[state][transition]) * len(self.alphabet.by_transition[transition]) - num_strings[state] = n - - else: - # Dead state - num_strings[state] = 0 + if not self.islive(state): + return 0 + + if state in num_strings: + if num_strings[state] is None: # "computing..." + # Recursion! There are infinitely many strings recognised + raise OverflowError(state) + return num_strings[state] + num_strings[state] = None # i.e. "computing..." + + n = 0 + if state in self.finals: + n += 1 + if state in self.transition_map: + transitions = self.transition_map[state] + for transition, next_state in transitions.items(): + n += get_num_strings(next_state) * len(self.alphabet.by_transition[transition]) + num_strings[state] = n return num_strings[state] @@ -1021,7 +1020,7 @@ def get_hash(obj): states = [initial] state_idx: Dict[int, int] = {get_hash(initial): 0} - finals = set() + finals = [] transition_map = {} # iterate over a growing list @@ -1031,7 +1030,7 @@ def get_hash(obj): # add to finals if final(state): - finals.add(i) + finals.append(i) # compute map for this state transition_map[i] = {} From 1a2c681ffc38ee37c628ddbd46038d1d9148104c Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Sun, 13 Jul 2025 15:25:21 -0700 Subject: [PATCH 25/27] Fix fsm validation --- interegular/fsm.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index b9e2a96..df57385 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -200,17 +200,17 @@ def __init__(self, alphabet: Alphabet, states: frozenset[State], initial: State, transitions, a non-final "oblivion" state is simulated. """ assert map is not None or transition_map is not None - if not self.__no_validation__: + if not __no_validation__: # Validation. Thanks to immutability, this only needs to be carried out once. - if not isinstance(self.alphabet, Alphabet): + if not isinstance(alphabet, Alphabet): raise TypeError("Expected an Alphabet instance") - if not self.initial in self.states: - raise Exception("Initial state " + repr(self.initial) + " must be one of " + repr(self.states)) - if not self.finals.issubset(self.states): - raise Exception("Final states " + repr(self.finals) + " must be a subset of " + repr(self.states)) - for state, transitions in self.transition_map.items(): - for next_state in transitions.values: - if not next_state in self.states: + if not initial in states: + raise Exception("Initial state " + repr(initial) + " must be one of " + repr(states)) + if not finals.issubset(states): + raise Exception("Final states " + repr(finals) + " must be a subset of " + repr(states)) + for state, transitions in transition_map.items(): + for symbol, next_state in transitions.items(): + if not next_state in states: raise Exception( "Transition for state " + repr(state) + " and symbol " + repr(symbol) + " leads to " + repr( next_state) + ", which is not a state") From 567c008bb44fcb7544b312286d8dce75568d9eda Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Sun, 20 Jul 2025 23:06:05 -0700 Subject: [PATCH 26/27] Fix cardinality bug --- interegular/fsm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/interegular/fsm.py b/interegular/fsm.py index df57385..4beea9b 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -767,6 +767,7 @@ def cardinality(self): def get_num_strings(state): # Many FSMs have at least one oblivion state if not self.islive(state): + num_strings[state] = 0 return 0 if state in num_strings: From c6497fd36734a3072e21d21c8966e7676c2521a8 Mon Sep 17 00:00:00 2001 From: Aubhro Sengupta Date: Mon, 21 Jul 2025 14:20:43 -0700 Subject: [PATCH 27/27] Modify AnythingElseCls equality operators to work with any instance of singleton class --- interegular/fsm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/interegular/fsm.py b/interegular/fsm.py index 4beea9b..43ec94f 100644 --- a/interegular/fsm.py +++ b/interegular/fsm.py @@ -35,10 +35,10 @@ def __lt__(self, other) -> bool: return False def __eq__(self, other) -> bool: - return self is other + return isinstance(other, _AnythingElseCls) def __hash__(self) -> int: - return hash(id(self)) + return hash(str(self)) # We use a class instance because that gives us control over how the special