diff --git a/Cargo.lock b/Cargo.lock index ab6fbff..c200b7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,7 +4,7 @@ version = 4 [[package]] name = "cscsca" -version = "0.27.1" +version = "0.28.0" dependencies = [ "cscsca_macros", "pollster", diff --git a/Cargo.toml b/Cargo.toml index 48775f9..ecfd8c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cscsca" authors = ["Charles Feyen"] -version = "0.27.1" +version = "0.28.0" edition = "2024" readme = "README.md" keywords = ["linguistics", "conlang", "sound_change_applier"] diff --git a/README.md b/README.md index d819769..77e40c1 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A sound change applier based on linguistic sound change notation. - Expansive conditions and anti-conditions - Definitions that can be inserted anywhere in a rule - Automatic and manual matching for lists of phones -- Gaps of arbitrary phones in conditions (useful for harmony) +- Arbitrary length sections of repeated phones - Can get information to use in conditions at runtime (variables) - Reasonably minimalist and simple, but also highly expressive and versatile - Usable as a crate that can be adapted to fit many mediums beyond CLI @@ -57,9 +57,13 @@ h >> ### Scopes Scopes are a way to dynamically determine which phone, group of phones, or lack thereof exists in a rule. -There are two types of scopes +There are three types of scopes - optional **`(`**...**`)`**: a phone or group of phones that is optional - selection **`{`**...**`,`**...**`}`**: a list of comma-separated phones or a group of phones that selects one phone or group of phones in that list +- repetition **`[`**...**`]`**: a phone or group of phones repeated 0 or more times. If a **`!`** is added in the scope, the scope represents the phone or group of phones before the **`!`** repeated 0 or more times, if it does not contain the phone or group of phones after the **`!`** + + +**Note**: repetition scopes are only allowed in conditions/anti-conditions (see: Conditions and Anti-Conditions) Examples: ```cscsca @@ -71,6 +75,12 @@ l (j) >> j ## `p` and `b` become `f` and `v` respectively {p, b} >> {f, v} + +## `u` becomes `y` when after `i` in a word (see: Conditions and Anti-Conditions) +u >> y / i [*] _ + +## `u` becomes `y` when after `i` in a word, unless a `w` is between the two (see: Conditions and Anti-Conditions) +u >> y / i [* ! w] _ ``` ### Labels @@ -78,6 +88,8 @@ As seen in the example above, corresponding scopes in the input and output try t To force scopes to agree on what they choose, we can use labels. A label has a name that starts with **`$`** and precedes a scope +**Note**: repetition scopes agree not in phones, but in phone count, causing agreeing repetition scopes to be the same length or shorter than the one that sets the agreement + Examples: ```cscsca ## `i` and `u` merge with preceding `h` or `x` into `j` `i` and `w` `u` @@ -169,16 +181,9 @@ DEFINE F {f, s, ç, x} ### Special Characters - **`*`**: represents any non-boundary phone. **`*`** may be preceded by a label to agree on which phone is represented -- **`..`**: a gap of zero or more non-boundary phones. (**Notes**: **`..`** must have a space on both sides and is only allowed in conditions). A gap may be preceded by a label to limit gap length to less than or equal to the length of the first gap with the same label - **`#`**: a word boundary - **`\`**: escapes the effects of the following character, may be used at the end of a line to continue the rule on the next line -### Reserved Characters -Characters that do nothing, but need to be escaped -- **`.`** -- **`[`** -- **`]`** - ### IO and Variables To print the current phonetic form, type **`PRINT`** at the start of a line, followed by the message you would like to print with it diff --git a/docs/README_template.md b/docs/README_template.md index 14ac030..c62abcb 100644 --- a/docs/README_template.md +++ b/docs/README_template.md @@ -8,7 +8,7 @@ A sound change applier based on linguistic sound change notation. - Expansive conditions and anti-conditions - Definitions that can be inserted anywhere in a rule - Automatic and manual matching for lists of phones -- Gaps of arbitrary phones in conditions (useful for harmony) +- Arbitrary length sections of repeated phones - Can get information to use in conditions at runtime (variables) - Reasonably minimalist and simple, but also highly expressive and versatile - Usable as a crate that can be adapted to fit many mediums beyond CLI diff --git a/docs/writing_rules.md b/docs/writing_rules.md index 399e2fa..bea0ea9 100644 --- a/docs/writing_rules.md +++ b/docs/writing_rules.md @@ -38,9 +38,13 @@ h >> ### Scopes Scopes are a way to dynamically determine which phone, group of phones, or lack thereof exists in a rule. -There are two types of scopes +There are three types of scopes - optional **`(`**...**`)`**: a phone or group of phones that is optional - selection **`{`**...**`,`**...**`}`**: a list of comma-separated phones or a group of phones that selects one phone or group of phones in that list +- repetition **`[`**...**`]`**: a phone or group of phones repeated 0 or more times. If a **`!`** is added in the scope, the scope represents the phone or group of phones before the **`!`** repeated 0 or more times, if it does not contain the phone or group of phones after the **`!`** + + +**Note**: repetition scopes are only allowed in conditions/anti-conditions (see: Conditions and Anti-Conditions) Examples: ```cscsca @@ -52,6 +56,12 @@ l (j) >> j ## `p` and `b` become `f` and `v` respectively {p, b} >> {f, v} + +## `u` becomes `y` when after `i` in a word (see: Conditions and Anti-Conditions) +u >> y / i [*] _ + +## `u` becomes `y` when after `i` in a word, unless a `w` is between the two (see: Conditions and Anti-Conditions) +u >> y / i [* ! w] _ ``` ### Labels @@ -59,6 +69,8 @@ As seen in the example above, corresponding scopes in the input and output try t To force scopes to agree on what they choose, we can use labels. A label has a name that starts with **`$`** and precedes a scope +**Note**: repetition scopes agree not in phones, but in phone count, causing agreeing repetition scopes to be the same length or shorter than the one that sets the agreement + Examples: ```cscsca ## `i` and `u` merge with preceding `h` or `x` into `j` `i` and `w` `u` @@ -150,16 +162,9 @@ DEFINE F {f, s, ç, x} ### Special Characters - **`*`**: represents any non-boundary phone. **`*`** may be preceded by a label to agree on which phone is represented -- **`..`**: a gap of zero or more non-boundary phones. (**Notes**: **`..`** must have a space on both sides and is only allowed in conditions). A gap may be preceded by a label to limit gap length to less than or equal to the length of the first gap with the same label - **`#`**: a word boundary - **`\`**: escapes the effects of the following character, may be used at the end of a line to continue the rule on the next line -### Reserved Characters -Characters that do nothing, but need to be escaped -- **`.`** -- **`[`** -- **`]`** - ### IO and Variables To print the current phonetic form, type **`PRINT`** at the start of a line, followed by the message you would like to print with it diff --git a/src/applier/mod.rs b/src/applier/mod.rs index edc4d5f..07d00a5 100644 --- a/src/applier/mod.rs +++ b/src/applier/mod.rs @@ -223,7 +223,7 @@ fn patterns_to_phones<'s: 'p, 'p>(patterns: &[Pattern<'s>], choices: &Choices<'_ return Err(ApplicationError::UnmatchedTokenInOutput(pattern.clone())); } }, - Pattern::Gap { .. } => return Err(ApplicationError::GapOutOfCond), + Pattern::Repetition { .. } => return Err(ApplicationError::RepetitionOutOfCond), _ => return Err(ApplicationError::UnmatchedTokenInOutput(pattern.clone())) } } @@ -238,7 +238,7 @@ pub enum ApplicationError<'s> { UnmatchedTokenInOutput(Pattern<'s>), InvalidSelectionAccess(Pattern<'s>, usize), ExceededLimit(LimitCondition), - GapOutOfCond, + RepetitionOutOfCond, PatternCannotBeConvertedToPhones(Pattern<'s>), } @@ -257,7 +257,7 @@ impl std::fmt::Display for ApplicationError<'_> { LimitCondition::Time(_) => "Could not apply changes in allotted time", LimitCondition::Count { attempts: _, max: _ } => "Could not apply changes with the allotted application attempts", }), - Self::GapOutOfCond => write!(f, "{}", RuleStructureError::GapOutOfCond), + Self::RepetitionOutOfCond => write!(f, "{}", RuleStructureError::RepetitionOutOfCond), Self::PatternCannotBeConvertedToPhones(pattern) => write!(f, "'{pattern}' cannot be converted to a phone or list of phones"), } } diff --git a/src/assets/demo.sca b/src/assets/demo.sca index 795712f..62e9aac 100644 --- a/src/assets/demo.sca +++ b/src/assets/demo.sca @@ -42,4 +42,4 @@ DEFINE Pv- { p, t, k } DEFINE Pv+ { b, d, g } ## u is fronted if an i exists before it in the same word without a w between them -u >> y / i $gap .. _ // w $gap .. _ \ No newline at end of file +u >> y / i [* ! w] _ \ No newline at end of file diff --git a/src/escaped_strings.rs b/src/escaped_strings.rs index 15c1633..c881c71 100644 --- a/src/escaped_strings.rs +++ b/src/escaped_strings.rs @@ -116,7 +116,6 @@ fn escape_input(input: &str) -> String { fn niche_escapes() { assert_eq!("\\_\\/".to_string(), escape_input("_/")); assert_eq!("\\_a".to_string(), escape_input("_a")); - assert_eq!("\\. \\.\\. \\.\\.\\.".to_string(), escape_input(". .. ...")); // isolated only escapes assert!(check_escapes("\\_a").is_err()); diff --git a/src/ir/mod.rs b/src/ir/mod.rs index 78f7255..713321d 100644 --- a/src/ir/mod.rs +++ b/src/ir/mod.rs @@ -2,7 +2,7 @@ use std::num::NonZero; use crate::{ executor::io_events::{GetType, IoEvent}, - keywords::{AND_CHAR, COND_CHAR, DEFINITION_LINE_START, DEFINITION_PREFIX, ESCAPE_CHAR, NOT_CHAR, VARIABLE_PREFIX}, + keywords::{DEFINITION_LINE_START, DEFINITION_PREFIX, ESCAPE_CHAR, VARIABLE_PREFIX}, ONE, }; @@ -49,7 +49,6 @@ pub enum IrError<'s> { EmptyDefinition, BadEscape(Option), ReservedCharacter(char), - UnexpectedNot, InvalidGetFormat(GetType), } @@ -66,7 +65,6 @@ impl std::fmt::Display for IrError<'_> { Self::BadEscape(None) => write!(f, "Found '{ESCAPE_CHAR}' with no following character"), Self::BadEscape(Some(c)) => write!(f, "Escaped normal character '{c}' ({ESCAPE_CHAR}{c})"), Self::ReservedCharacter(c) => write!(f, "Found reserved character '{c}' consider escaping it ('{ESCAPE_CHAR}{c}')"), - Self::UnexpectedNot => write!(f, "Found '{NOT_CHAR}' not after '{COND_CHAR}' or '{AND_CHAR}'"), Self::InvalidGetFormat(get_type) => write!(f, "Invalid format after '{get_type}', expected variable name and message"), } } diff --git a/src/ir/tests.rs b/src/ir/tests.rs index 53af755..17d206b 100644 --- a/src/ir/tests.rs +++ b/src/ir/tests.rs @@ -151,8 +151,7 @@ fn get_lazy_def_name() { assert_eq!(Some("ab"), get_first_phone("ab")); assert_eq!(Some("a"), get_first_phone("a b")); assert_eq!(Some("a"), get_first_phone("a/")); - assert_eq!(Some("a"), get_first_phone("a..")); - assert_eq!(None, get_first_phone(".. a")); + assert_eq!(Some("a.."), get_first_phone("a..")); assert_eq!(None, get_first_phone("_")); assert_eq!(None, get_first_phone("/")); assert_eq!(Some("\\/"), get_first_phone("\\/")); @@ -290,31 +289,32 @@ fn tokenize_scope_bounds_with_suroundings() { #[test] -fn tokenize_gap() { - assert_eq!(Ok(vec![IrLine::Ir { tokens: vec![IrToken::Gap], lines: ONE}]), tokenize("..")); -} +fn tokenize_repetition() { + assert_eq!(Ok(vec![IrLine::Ir { tokens: vec![ + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Any, + IrToken::ScopeEnd(ScopeType::Repetition), + ], lines: ONE}]), tokenize("[*]")); -#[test] -fn tokenize_gap_with_suroundings() { - assert_eq!(Ok(vec![IrLine::Ir { tokens: vec![IrToken::Phone(Phone::Symbol("a")), IrToken::Gap, IrToken::Phone(Phone::Symbol("b")),], lines: ONE}]), tokenize("a .. b")); + + assert_eq!(Ok(vec![IrLine::Ir { tokens: vec![ + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Any, + IrToken::Negative, + IrToken::Phone(Phone::Symbol("w")), + IrToken::ScopeEnd(ScopeType::Repetition), + ], lines: ONE}]), tokenize("[* ! w]")); } #[test] -fn tokenize_dot_with_suroundings() { - assert_eq!( - tokenize("a..b"), - Err((IrError::ReservedCharacter('.'), 1)) - ); - - assert_eq!( - tokenize("a.b"), - Err((IrError::ReservedCharacter('.'), 1)) - ); - - assert_eq!( - tokenize("a\\.b"), - Ok(vec![IrLine::Ir { tokens: vec![IrToken::Phone(Phone::Symbol("a\\.b"))], lines: ONE}]) - ); +fn tokenize_repetition_with_suroundings() { + assert_eq!(Ok(vec![IrLine::Ir { tokens: vec![ + IrToken::Phone(Phone::Symbol("a")), + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Any, + IrToken::ScopeEnd(ScopeType::Repetition), + IrToken::Phone(Phone::Symbol("b")), + ], lines: ONE}]), tokenize("a [*] b")); } #[test] diff --git a/src/ir/tokenizer.rs b/src/ir/tokenizer.rs index 9f1a9a4..3e21ed5 100644 --- a/src/ir/tokenizer.rs +++ b/src/ir/tokenizer.rs @@ -3,8 +3,8 @@ use std::num::NonZero; use crate::{ escaped_strings::check_escapes, executor::io_events::{GetType, IoEvent, RuntimeIoEvent, TokenizerIoEvent}, - ir::{prefix::Prefix, tokenization_data::TokenizationData, tokens::{Break, IrToken}, IrError, IrLine}, - keywords::{is_special_char, is_special_str, AND_CHAR, ANY_CHAR, ARG_SEP_CHAR, BOUND_CHAR, COMMENT_LINE_START, COND_CHAR, DEFINITION_LINE_START, DEFINITION_PREFIX, ESCAPE_CHAR, GAP_STR, GET_AS_CODE_LINE_START, GET_LINE_START, INPUT_PATTERN_STR, LABEL_PREFIX, LAZY_DEFINITION_LINE_START, LTR_CHAR, MATCH_CHAR, NOT_CHAR, OPTIONAL_END_CHAR, OPTIONAL_START_CHAR, PRINT_LINE_START, RTL_CHAR, SELECTION_END_CHAR, SELECTION_START_CHAR, SPECIAL_STRS, VARIABLE_PREFIX}, + ir::{IrError, IrLine, prefix::Prefix, tokenization_data::TokenizationData, tokens::{Break, IrToken}}, + keywords::{AND_CHAR, ANY_CHAR, ARG_SEP_CHAR, BOUND_CHAR, COMMENT_LINE_START, COND_CHAR, DEFINITION_LINE_START, DEFINITION_PREFIX, ESCAPE_CHAR, REPETITION_END_CHAR, REPETITION_START_CHAR, GET_AS_CODE_LINE_START, GET_LINE_START, INPUT_PATTERN_STR, LABEL_PREFIX, LAZY_DEFINITION_LINE_START, LTR_CHAR, MATCH_CHAR, NOT_CHAR, OPTIONAL_END_CHAR, OPTIONAL_START_CHAR, PRINT_LINE_START, RTL_CHAR, SELECTION_END_CHAR, SELECTION_START_CHAR, SPECIAL_STRS, VARIABLE_PREFIX, is_special_char, is_special_str}, phones::Phone, sub_string::SubString, tokens::{AndType, CondType, Direction, ScopeType, Shift, ShiftType}, @@ -137,6 +137,8 @@ fn parse_character<'s>(c: char, tokens: &mut Vec>, prefix: &mut Opti OPTIONAL_END_CHAR => push_phone_and(c, IrToken::ScopeEnd(ScopeType::Optional), tokens, slice, prefix, tokenization_data, lazy_expansions)?, SELECTION_START_CHAR => push_phone_and(c, IrToken::ScopeStart(ScopeType::Selection), tokens, slice, prefix, tokenization_data, lazy_expansions)?, SELECTION_END_CHAR => push_phone_and(c, IrToken::ScopeEnd(ScopeType::Selection), tokens, slice, prefix, tokenization_data, lazy_expansions)?, + REPETITION_START_CHAR => push_phone_and(c, IrToken::ScopeStart(ScopeType::Repetition), tokens, slice, prefix, tokenization_data, lazy_expansions)?, + REPETITION_END_CHAR => push_phone_and(c, IrToken::ScopeEnd(ScopeType::Repetition), tokens, slice, prefix, tokenization_data, lazy_expansions)?, // handles simple one-to-one char to token pushes AND_CHAR => push_phone_and(c, IrToken::Break(Break::And(AndType::And)), tokens, slice, prefix, tokenization_data, lazy_expansions)?, NOT_CHAR => { @@ -149,7 +151,7 @@ fn parse_character<'s>(c: char, tokens: &mut Vec>, prefix: &mut Opti tokens.pop(); IrToken::Break(Break::AntiCond) }, - _ => return Err(IrError::UnexpectedNot), + _ => IrToken::Negative, }; push_phone_and(c, token, tokens, slice, prefix, tokenization_data, lazy_expansions)?; @@ -230,7 +232,7 @@ fn push_phone_and<'s>(c: char, token: IrToken<'s>, tokens: &mut Vec> /// Pushes the slice as a phone and prepares it to start the next slice /// -/// Handles escape validity and input pattern and gap generation +/// Handles escape validity and input pattern generation /// /// If there is a prefix, it either expands the phone as a definition or /// inserts a selection token and resets the prefix to None @@ -248,7 +250,6 @@ fn push_phone<'s>(tokens: &mut Vec>, slice: &mut SubString<'s>, pref match (&prefix, literal) { (None, INPUT_PATTERN_STR) => tokens.push(IrToken::CondType(CondType::Pattern)), - (None, GAP_STR) => tokens.push(IrToken::Gap), (None, "") => (), (None, _) => tokens.push(IrToken::Phone(Phone::Symbol(literal))), (Some(Prefix::Definition), _) => tokenization_data.get_definition(literal, tokens, lazy_expansions)?, diff --git a/src/ir/tokens.rs b/src/ir/tokens.rs index 397792f..8eecc64 100644 --- a/src/ir/tokens.rs +++ b/src/ir/tokens.rs @@ -1,9 +1,9 @@ use std::fmt::Display; use crate::{ - keywords::{ANY_CHAR, ARG_SEP_CHAR, COND_CHAR, GAP_STR, LABEL_PREFIX}, + keywords::{ANY_CHAR, ARG_SEP_CHAR, COND_CHAR, LABEL_PREFIX, NOT_CHAR}, phones::Phone, - tokens::{ScopeType, Shift, CondType, AndType} + tokens::{AndType, CondType, ScopeType, Shift} }; /// Tokens that make up the intermediate representation of sound shifts @@ -19,14 +19,14 @@ pub enum IrToken<'s> { Any, /// An item seperator for selection scopes ArgSep, - /// A gap of size 0 or greater that does not contain a word boundery - Gap, /// The main focus and type of a condition or anti-condition CondType(CondType), /// The start of a scope ScopeStart(ScopeType), /// The end of a scope ScopeEnd(ScopeType), + /// Repetition negator + Negative, } impl Display for IrToken<'_> { @@ -35,12 +35,12 @@ impl Display for IrToken<'_> { Self::Any => write!(f, "{ANY_CHAR}"), Self::ArgSep => write!(f, "{ARG_SEP_CHAR}"), Self::Break(r#break) => write!(f, "{break}"), - Self::Gap => write!(f, "{GAP_STR}"), Self::CondType(focus) => write!(f, "{focus}"), Self::Phone(phone) => write!(f, "{phone}"), Self::ScopeEnd(kind) => write!(f, "{}", kind.fmt_end()), Self::ScopeStart(kind) => write!(f, "{}", kind.fmt_start()), Self::Label(name) => write!(f, "{LABEL_PREFIX}{name}"), + Self::Negative => write!(f, "{NOT_CHAR}"), } } } diff --git a/src/keywords.rs b/src/keywords.rs index 5ca9e33..ce0ad21 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -72,6 +72,10 @@ const_list! { SELECTION_START_CHAR = '{'; /// the end of an selection scope SELECTION_END_CHAR = '}'; + /// the start of a repetition scope + REPETITION_START_CHAR = '['; + /// the end of a repetition scope + REPETITION_END_CHAR = ']'; // Cond foci /// The seperator in a match condition @@ -91,11 +95,6 @@ const_list! { const_list! { /// Special characters that are not used by themselves UNUSED_CHARS: [pub char]; - - /// Used when duplicated for a gap - DOT_CHAR = '.'; - SQUARE_START_CHAR = '['; - SQUARE_END_CHAR = ']'; } const_list! { @@ -110,8 +109,6 @@ const_list! { /// Strings that act like special characters when isolated pub(crate) SPECIAL_STRS: [pub &str]; - /// A gap - GAP_STR = ".."; /// The input in a pattern condition INPUT_PATTERN_STR = "_"; } diff --git a/src/matcher/choices.rs b/src/matcher/choices.rs index e64d031..8359d14 100644 --- a/src/matcher/choices.rs +++ b/src/matcher/choices.rs @@ -7,7 +7,7 @@ use crate::{phones::Phone, tokens::ScopeId}; pub struct Choices<'c, 's> { pub(super) selection: Cow<'c, HashMap, usize>>, pub(super) optional: Cow<'c, HashMap, bool>>, - pub(super) gap: Cow<'c, HashMap<&'s str, usize>>, + pub(super) repetition: Cow<'c, HashMap<&'s str, usize>>, pub(super) any: Cow<'c, HashMap, Phone<'s>>>, } @@ -22,9 +22,9 @@ impl<'c, 's> Choices<'c, 's> { &self.optional } - /// Gets the gap choices - pub fn gap(&self) -> &HashMap<&'s str, usize> { - &self.gap + /// Gets the repetition choices + pub fn repetition(&self) -> &HashMap<&'s str, usize> { + &self.repetition } /// Gets the any phone choices @@ -37,7 +37,7 @@ impl<'c, 's> Choices<'c, 's> { Self { selection: Cow::Borrowed(&*self.selection), optional: Cow::Borrowed(&*self.optional), - gap: Cow::Borrowed(&*self.gap), + repetition: Cow::Borrowed(&*self.repetition), any: Cow::Borrowed(&*self.any), } } @@ -47,7 +47,7 @@ impl<'c, 's> Choices<'c, 's> { OwnedChoices { selection: take_owned_from_cow(self.selection), optional: take_owned_from_cow(self.optional), - gap: take_owned_from_cow(self.gap), + repetition: take_owned_from_cow(self.repetition), any: take_owned_from_cow(self.any), } } @@ -62,8 +62,8 @@ impl<'c, 's> Choices<'c, 's> { self.optional = Cow::Owned(optional); } - if let Some(gap) = owned.gap { - self.gap = Cow::Owned(gap); + if let Some(repetition) = owned.repetition { + self.repetition = Cow::Owned(repetition); } if let Some(any) = owned.any { @@ -79,7 +79,7 @@ impl<'c, 's> Choices<'c, 's> { pub struct OwnedChoices<'s> { selection: Option, usize>>, optional: Option, bool>>, - gap: Option>, + repetition: Option>, any: Option, Phone<'s>>>, } diff --git a/src/matcher/patterns/gap.rs b/src/matcher/patterns/gap.rs deleted file mode 100644 index 735e147..0000000 --- a/src/matcher/patterns/gap.rs +++ /dev/null @@ -1,65 +0,0 @@ -use crate::{keywords::GAP_STR, matcher::{choices::{Choices, OwnedChoices}, match_state::MatchState, phones::Phones}}; - -/// A pattern that represents some non-negative number (possibly zero) of non-boundary phones -#[derive(Debug, Clone, PartialEq, Eq, Default)] -pub struct Gap<'s> { - pub(super) len: usize, - pub(super) checked_at_zero: bool, - pub(super) id: Option<&'s str>, -} - -impl<'s> MatchState<'s> for Gap<'s> { - fn matches<'p>(&self, phones: &mut Phones<'_, 'p>, choices: &Choices<'_, 'p>) -> Option> where 's: 'p { - for _ in 0..self.len() { - if phones.next().is_bound() { - // returns `None` if a bound is crossed - return None; - } - } - - let mut new_choices = choices.partial_clone(); - - if let Some(id) = self.id { - if let Some(max_len) = choices.gap.get(id).copied() { - if self.len > max_len { - // if the max len is exceeded the match fails and the gap should exaust - return None; - } - } else { - // sets the choice if it is the first gap with the id - new_choices.gap.to_mut().insert(id, self.len); - } - } - - Some(new_choices.owned_choices()) - } - - fn next_match<'p>(&mut self, phones: &Phones<'_, 'p>, choices: &Choices<'_, 'p>) -> Option> where 's: 'p { - if self.checked_at_zero { - self.len += 1; - } else { - self.checked_at_zero = true; - } - - self.matches(&mut phones.clone(), choices) - } - - fn len(&self) -> usize { - self.len - } - - fn reset(&mut self) { - self.len = 0; - self.checked_at_zero = false; - } -} - -impl std::fmt::Display for Gap<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if let Some(id) = self.id { - write!(f, "{id}")?; - } - - write!(f, " {GAP_STR} ") - } -} \ No newline at end of file diff --git a/src/matcher/patterns/ir_to_patterns/mod.rs b/src/matcher/patterns/ir_to_patterns/mod.rs index ace8c30..d55a1fe 100644 --- a/src/matcher/patterns/ir_to_patterns/mod.rs +++ b/src/matcher/patterns/ir_to_patterns/mod.rs @@ -1,17 +1,9 @@ use std::{cell::RefCell, num::NonZero, rc::Rc}; use crate::{ - executor::io_events::{IoEvent, RuntimeIoEvent}, - ir::{tokens::{Break, IrToken}, IrLine}, - keywords::GAP_STR, - matcher::patterns::{ - cond::CondPattern, - list::PatternList, - rule::{RulePattern, SoundChangeRule}, - Pattern, - }, - tokens::{ScopeType, Shift, CondType, AndType, ScopeId, LabelType}, - ONE, + ONE, executor::io_events::{IoEvent, RuntimeIoEvent}, ir::{IrLine, tokens::{Break, IrToken}}, matcher::patterns::{ + Pattern, cond::CondPattern, list::PatternList, rule::{RulePattern, SoundChangeRule} + }, tokens::{AndType, CondType, LabelType, ScopeId, ScopeType, Shift} }; #[cfg(test)] @@ -150,7 +142,6 @@ fn ir_tokens_to_patterns<'ir, 's: 'ir>(ir: &mut impl Iterator Pattern::new_phone(*phone), IrToken::Any => Pattern::new_any(any_id(default_scope_ids, parent_scope.cloned())), - IrToken::Gap => Pattern::new_gap(None), // starts a default labeled option scope IrToken::ScopeStart(ScopeType::Optional) => { let id = optional_id(default_scope_ids, parent_scope.cloned()); @@ -167,6 +158,10 @@ fn ir_tokens_to_patterns<'ir, 's: 'ir>(ir: &mut impl Iterator { + let(inclusive, exclusive) = ir_to_repetition(ir)?; + Pattern::new_repetition(None, inclusive, exclusive) + }, // ensures a label is proceeding a labelable token then creates that token with the label IrToken::Label(name) => { let next = ir.next(); @@ -178,11 +173,13 @@ fn ir_tokens_to_patterns<'ir, 's: 'ir>(ir: &mut impl Iterator Pattern::new_optional(ir_tokens_to_patterns(ir, child_ids, id.as_ref(), Some(ScopeType::Optional))?, id), ScopeType::Selection => Pattern::new_selection(selection_contents_to_patterns(ir, child_ids, id.as_ref())?, id), + ScopeType::Repetition => { + let(inclusive, exclusive) = ir_to_repetition(ir)?; + Pattern::new_repetition(Some(*name), inclusive, exclusive) + }, } } else if let Some(IrToken::Any) = next { Pattern::new_any(id) - } else if let Some(IrToken::Gap) = next { - Pattern::new_gap(Some(name)) } else { return Err(RuleStructureError::LabelNotFollowedByScope(name)); } @@ -197,6 +194,10 @@ fn ir_tokens_to_patterns<'ir, 's: 'ir>(ir: &mut impl Iterator return Err(RuleStructureError::ArgSepOutOfSelection), IrToken::CondType(r#type) => return Err(RuleStructureError::UnexpectedCondType(*r#type)), + IrToken::Negative if end_at == Some(ScopeType::Repetition) => { + patterns.push(Pattern::List(PatternList::default())); // signals negative + return Ok(patterns); + } // these tokens should be removed in checking _ => return Err(RuleStructureError::UnexpectedToken(*ir_token)), }; @@ -207,6 +208,36 @@ fn ir_tokens_to_patterns<'ir, 's: 'ir>(ir: &mut impl Iterator(ir: &mut impl Iterator>) -> Result<(PatternList<'s>, Option>), RuleStructureError<'s>> { + let followed_by_exclusive = |pat: &Pattern<'_>| pat == &Pattern::List(PatternList::default()); + + let mut inclusive_patterns = ir_tokens_to_patterns(ir, None, None, Some(ScopeType::Repetition))?; + + let has_exclusive = inclusive_patterns.pop_if(|pat| followed_by_exclusive(pat)).is_some(); + + if inclusive_patterns.is_empty() { + return Err(RuleStructureError::EmptyRepetition); + } + + let exclusive = if has_exclusive { + let exclusive_patterns = ir_tokens_to_patterns(ir, None, None, Some(ScopeType::Repetition))?; + + match exclusive_patterns.last() { + None => return Err(RuleStructureError::EmptyExclusion), + Some(pat) if followed_by_exclusive(pat) => return Err(RuleStructureError::UnexpectedToken(IrToken::Negative)), + _ => (), + } + + Some(PatternList::new(exclusive_patterns)) + } else { + None + }; + + let inclusive = PatternList::new(inclusive_patterns); + + Ok((inclusive, exclusive)) +} + /// Converts the ir tokens in a selection scope to a list of pattern lists /// where each is an option to be selected by the scope: /// (options are seperated by the `ArgSep` token) @@ -339,7 +370,9 @@ pub enum RuleStructureError<'s> { AndDoesNotFollowCond(AndType), SecondShift(Shift), UnexpectedCondType(CondType), - GapOutOfCond, + RepetitionOutOfCond, + EmptyRepetition, + EmptyExclusion, } impl std::error::Error for RuleStructureError<'_> {} @@ -365,7 +398,9 @@ impl std::fmt::Display for RuleStructureError<'_> { => write!(f, "Found a second shift token '{shift}' after the first"), Self::UnexpectedCondType(r#type) => write!(f, "Found '{type}' either outside of a condition or after '{}' or '{}'", CondType::Pattern, CondType::Match), - Self::GapOutOfCond => write!(f, "Gaps ('{GAP_STR}') are not allowed outside of conditions and anti-conditions"), + Self::RepetitionOutOfCond => write!(f, "Repetitions ('{}...{}') are not allowed outside of conditions and anti-conditions", ScopeType::Repetition.fmt_start(), ScopeType::Repetition.fmt_end()), + Self::EmptyRepetition => write!(f, "A repetition must contain some inclusive pattern"), + Self::EmptyExclusion => write!(f, "A repetition exclusion must contain some pattern"), } } } \ No newline at end of file diff --git a/src/matcher/patterns/ir_to_patterns/tests.rs b/src/matcher/patterns/ir_to_patterns/tests.rs index 98ba92d..a396391 100644 --- a/src/matcher/patterns/ir_to_patterns/tests.rs +++ b/src/matcher/patterns/ir_to_patterns/tests.rs @@ -553,7 +553,7 @@ fn three_conds_and_anti_conds() { } #[test] -fn shift_cond_gap_input() { +fn shift_cond_repetition_input() { let shift = Shift { dir: Direction::Ltr, kind: ShiftType::Move}; assert_eq!(Ok(RuleLine::Rule { rule: SoundChangeRule { @@ -564,7 +564,7 @@ fn shift_cond_gap_input() { PatternList::default(), vec![CondPattern::new( CondType::Pattern, - PatternList::new(vec![Pattern::new_gap(None)]), + PatternList::new(vec![Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None)]), PatternList::default(), )], Vec::new(), @@ -573,13 +573,15 @@ fn shift_cond_gap_input() { }, lines: ONE }), build_rule(IrLine::Ir { tokens: vec![ IrToken::Break(Break::Shift(shift)), IrToken::Break(Break::Cond), - IrToken::Gap, + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Any, + IrToken::ScopeEnd(ScopeType::Repetition), IrToken::CondType(CondType::Pattern), ], lines: ONE })); } #[test] -fn shift_anti_cond_gap_input() { +fn shift_anti_cond_repetition_input() { let shift = Shift { dir: Direction::Ltr, kind: ShiftType::Move}; assert_eq!(Ok(RuleLine::Rule { rule: SoundChangeRule { @@ -591,7 +593,7 @@ fn shift_anti_cond_gap_input() { Vec::new(), vec![CondPattern::new( CondType::Pattern, - PatternList::new(vec![Pattern::new_gap(None)]), + PatternList::new(vec![Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None)]), PatternList::default(), )], ).expect("pattern construction should be valid") @@ -599,13 +601,15 @@ fn shift_anti_cond_gap_input() { }, lines: ONE }), build_rule(IrLine::Ir { tokens: vec![ IrToken::Break(Break::Shift(shift)), IrToken::Break(Break::AntiCond), - IrToken::Gap, + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Any, + IrToken::ScopeEnd(ScopeType::Repetition), IrToken::CondType(CondType::Pattern), ], lines: ONE })); } #[test] -fn shift_cond_label_gap_input() { +fn shift_cond_label_repetition_input() { let shift = Shift { dir: Direction::Ltr, kind: ShiftType::Move}; assert_eq!(Ok(RuleLine::Rule { rule: SoundChangeRule { @@ -616,7 +620,7 @@ fn shift_cond_label_gap_input() { PatternList::default(), vec![CondPattern::new( CondType::Pattern, - PatternList::new(vec![Pattern::new_gap(Some("label"))]), + PatternList::new(vec![Pattern::new_repetition(Some("label"), PatternList::new(vec![Pattern::new_any(None)]), None)]), PatternList::default(), )], Vec::new(), @@ -626,7 +630,71 @@ fn shift_cond_label_gap_input() { IrToken::Break(Break::Shift(shift)), IrToken::Break(Break::Cond), IrToken::Label("label"), - IrToken::Gap, + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Any, + IrToken::ScopeEnd(ScopeType::Repetition), + IrToken::CondType(CondType::Pattern), + ], lines: ONE })); +} + +#[test] +fn repetition_with_exclusive() { + let shift = Shift { dir: Direction::Ltr, kind: ShiftType::Move}; + + assert_eq!(Err(RuleStructureError::EmptyRepetition), build_rule(IrLine::Ir { tokens: vec![ + IrToken::Break(Break::Shift(shift)), + IrToken::Break(Break::Cond), + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Negative, + IrToken::Phone(Phone::Bound), + IrToken::ScopeEnd(ScopeType::Repetition), + IrToken::CondType(CondType::Pattern), + ], lines: ONE })); + + assert_eq!(Ok(RuleLine::Rule { rule: SoundChangeRule { + kind: shift, + output: Vec::new(), + pattern: RefCell::new( + RulePattern::new( + PatternList::default(), + vec![CondPattern::new( + CondType::Pattern, + PatternList::new(vec![Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), Some(PatternList::new(vec![Pattern::new_phone(Phone::Bound)])))]), + PatternList::default(), + )], + Vec::new(), + ).expect("pattern construction should be valid") + ), + }, lines: ONE }), build_rule(IrLine::Ir { tokens: vec![ + IrToken::Break(Break::Shift(shift)), + IrToken::Break(Break::Cond), + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Any, + IrToken::Negative, + IrToken::Phone(Phone::Bound), + IrToken::ScopeEnd(ScopeType::Repetition), + IrToken::CondType(CondType::Pattern), + ], lines: ONE })); + + assert_eq!(Err(RuleStructureError::EmptyExclusion), build_rule(IrLine::Ir { tokens: vec![ + IrToken::Break(Break::Shift(shift)), + IrToken::Break(Break::Cond), + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Any, + IrToken::Negative, + IrToken::ScopeEnd(ScopeType::Repetition), + IrToken::CondType(CondType::Pattern), + ], lines: ONE })); + + assert_eq!(Err(RuleStructureError::UnexpectedToken(IrToken::Negative)), build_rule(IrLine::Ir { tokens: vec![ + IrToken::Break(Break::Shift(shift)), + IrToken::Break(Break::Cond), + IrToken::ScopeStart(ScopeType::Repetition), + IrToken::Any, + IrToken::Negative, + IrToken::Phone(Phone::Bound), + IrToken::Negative, + IrToken::ScopeEnd(ScopeType::Repetition), IrToken::CondType(CondType::Pattern), ], lines: ONE })); } diff --git a/src/matcher/patterns/list.rs b/src/matcher/patterns/list.rs index 6168d27..f64b82b 100644 --- a/src/matcher/patterns/list.rs +++ b/src/matcher/patterns/list.rs @@ -3,7 +3,7 @@ use crate::{ matcher::{ choices::{Choices, OwnedChoices}, match_state::MatchState, - patterns::{check_box::CheckBox, gap::Gap, non_bound::NonBound, optional::Optional, selection::Selection, Pattern}, + patterns::{check_box::CheckBox, repetition::Repetition, non_bound::NonBound, optional::Optional, selection::Selection, Pattern}, phones::Phones }, phones::Phone, @@ -30,6 +30,10 @@ impl<'s> PatternList<'s> { &self.patterns } + pub fn push(&mut self, pat: Pattern<'s>) { + self.patterns.push(pat); + } + /// Sets the flag marking the list as checked at its current position to `false` pub const fn checked_flag_reset(&mut self) { self.checked_at_initial = false; @@ -50,8 +54,8 @@ impl<'s> PatternList<'s> { return Err(ApplicationError::PatternCannotBeConvertedToPhones(pattern.clone())); }, - Pattern::Gap(Gap { id: Some(id), .. }) => - match choices.gap.get(id) { + Pattern::Repetition(Repetition { id: Some(id), .. }) => + match choices.repetition.get(id) { Some(0) => (), _ => return Err(ApplicationError::PatternCannotBeConvertedToPhones(pattern.clone())), } @@ -102,7 +106,7 @@ impl<'s> PatternList<'s> { new_choices.take_owned(pat_choices); // creates the phones for the remaining patterns - let mut next_phones = phones.clone(); + let mut next_phones = *phones; next_phones.skip(pat.len()); if let Some(next_choices) = self.next_sub_match(index + 1, &next_phones, &new_choices) { diff --git a/src/matcher/patterns/mod.rs b/src/matcher/patterns/mod.rs index d2fbd94..b9cab20 100644 --- a/src/matcher/patterns/mod.rs +++ b/src/matcher/patterns/mod.rs @@ -1,8 +1,10 @@ +use std::cell::RefCell; + use crate::{ matcher::{ choices::{Choices, OwnedChoices}, match_state::MatchState, - patterns::{check_box::CheckBox, gap::Gap, list::PatternList, non_bound::NonBound, optional::Optional, selection::Selection}, + patterns::{check_box::CheckBox, repetition::Repetition, list::PatternList, non_bound::NonBound, optional::Optional, selection::Selection}, phones::Phones }, phones::Phone, @@ -13,7 +15,7 @@ pub mod list; pub mod cond; pub mod rule; pub mod non_bound; -pub mod gap; +pub mod repetition; pub mod optional; pub mod selection; pub mod check_box; @@ -28,9 +30,10 @@ mod tests; pub enum Pattern<'s> { Phone(CheckBox<'s, Phone<'s>>), NonBound(CheckBox<'s, NonBound<'s>>), - Gap(Gap<'s>), + Repetition(Repetition<'s>), Optional(Optional<'s>), Selection(Selection<'s>), + List(PatternList<'s>), } impl<'s> Pattern<'s> { @@ -42,8 +45,14 @@ impl<'s> Pattern<'s> { Self::NonBound(CheckBox::new(NonBound { id })) } - pub const fn new_gap(id: Option<&'s str>) -> Self { - Self::Gap(Gap { len: 0, checked_at_zero: false, id }) + pub fn new_repetition(id: Option<&'s str>, inclusive: PatternList<'s>, exclusive: Option>) -> Self { + Self::Repetition(Repetition { + checked_at_zero: false, + inclusive, exclusive: exclusive.map(RefCell::new), + included: PatternList::default(), + len: 0, + id, + }) } pub const fn new_optional(content: Vec>, id: Option>) -> Self { @@ -76,9 +85,10 @@ impl<'s> MatchState<'s> for Pattern<'s> { match self { Self::Phone(phone) => phone.matches(phones, choices), Self::NonBound(any) => any.matches(phones, choices), - Self::Gap(gap) => gap.matches(phones, choices), + Self::Repetition(repetition) => repetition.matches(phones, choices), Self::Optional(option) => option.matches(phones, choices), Self::Selection(selection) => selection.matches(phones, choices), + Self::List(list) => list.matches(phones, choices), } } @@ -86,9 +96,10 @@ impl<'s> MatchState<'s> for Pattern<'s> { match self { Self::Phone(phone) => phone.next_match(phones, choices), Self::NonBound(any) => any.next_match(phones, choices), - Self::Gap(gap) => gap.next_match(phones, choices), + Self::Repetition(repetition) => repetition.next_match(phones, choices), Self::Optional(option) => option.next_match(phones, choices), Self::Selection(selection) => selection.next_match(phones, choices), + Self::List(list) => list.next_match(phones, choices), } } @@ -96,9 +107,10 @@ impl<'s> MatchState<'s> for Pattern<'s> { match self { Self::Phone(phone) => phone.len(), Self::NonBound(any) => any.len(), - Self::Gap(gap) => gap.len(), + Self::Repetition(repetition) => repetition.len(), Self::Optional(option) => option.len(), Self::Selection(selection) => selection.len(), + Self::List(list) => list.len(), } } @@ -106,9 +118,10 @@ impl<'s> MatchState<'s> for Pattern<'s> { match self { Self::Phone(phone) => phone.reset(), Self::NonBound(any) => any.reset(), - Self::Gap(gap) => gap.reset(), + Self::Repetition(repetition) => repetition.reset(), Self::Optional(option) => option.reset(), Self::Selection(selection) => selection.reset(), + Self::List(list) => list.reset(), } } } @@ -118,9 +131,10 @@ impl std::fmt::Display for Pattern<'_> { match self { Self::Phone(phone) => write!(f, "{}", phone.unit_state.as_symbol()), Self::NonBound(any) => write!(f, "{}", any.unit_state), - Self::Gap(gap) => write!(f, "{gap}"), + Self::Repetition(repetition) => write!(f, "{repetition}"), Self::Optional(option) => write!(f, "{option}"), Self::Selection(selection) => write!(f, "{selection}"), + Self::List(list) => write!(f, "{list}"), } } } \ No newline at end of file diff --git a/src/matcher/patterns/repetition.rs b/src/matcher/patterns/repetition.rs new file mode 100644 index 0000000..8f86997 --- /dev/null +++ b/src/matcher/patterns/repetition.rs @@ -0,0 +1,130 @@ +use std::cell::RefCell; + +use crate::{keywords::{REPETITION_END_CHAR, REPETITION_START_CHAR, NOT_CHAR}, matcher::{choices::{Choices, OwnedChoices}, match_state::MatchState, patterns::{Pattern, list::PatternList}, phones::Phones}}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Repetition<'s> { + pub(super) checked_at_zero: bool, + pub(super) inclusive: PatternList<'s>, + pub(super) exclusive: Option>>, + pub(super) included: PatternList<'s>, + pub(super) len: usize, + pub(super) id: Option<&'s str>, +} + +impl<'s> MatchState<'s> for Repetition<'s> { + fn matches<'p>(&self, phones: &mut Phones<'_, 'p>, choices: &Choices<'_, 'p>) -> Option> where 's: 'p { + if let Some(mut exclusive) = self.exclusive.as_ref().map(RefCell::borrow_mut) { + let mut phones2 = *phones; + + let len = self.id.as_ref() + .and_then(|id| choices.repetition.get(id)) + .copied() + .unwrap_or(self.len); + + for _ in 0..len { + if exclusive.next_match(&phones2, choices).is_some() { + exclusive.reset(); + return None; + } + + exclusive.reset(); + phones2.next(); + } + } + + if self.included.len() == self.len && let Some(new_choices) = self.included.matches(phones, choices) { + Some(new_choices) + } else { + None + } + } + + fn next_match<'p>(&mut self, phones: &Phones<'_, 'p>, choices: &Choices<'_, 'p>) -> Option> where 's: 'p { + if self.checked_at_zero || self.id.as_ref().map(|id| choices.repetition.contains_key(id)).is_some_and(|exists| exists) { + let mut new_choices = choices.partial_clone(); + + let mut max_len = phones.rem_len(); + + if let Some(id) = &self.id && let Some(max) = choices.repetition.get(id).copied() { + max_len = max.min(max_len); + } + + loop { + loop { + if let Some(indcluded_choices) = self.included.next_match(phones, &new_choices) { + let mut choices = new_choices.partial_clone(); + choices.take_owned(indcluded_choices); + + if let Some(match_choices) = self.matches(&mut phones.clone(), &choices) { + choices.take_owned(match_choices); + + if let Some(id) = &self.id && !choices.repetition.contains_key(id) { + choices.repetition.to_mut().insert(id, self.len); + } + + new_choices.take_owned(choices.owned_choices()); + + return Some(new_choices.owned_choices()); + } + } else { + self.included.reset(); + self.included.push(Pattern::List(self.inclusive.clone())); + + if self.included.inner().len() > max_len { + break; + } + } + } + + if self.len > max_len { + break; + } + + self.len += 1; + self.included = PatternList::default(); + } + + None + } else { + self.checked_at_zero = true; + self.len = 0; + self.included = PatternList::default(); + + if let Some(id) = self.id { + let mut new_choices = choices.partial_clone(); + new_choices.repetition.to_mut().insert(id, self.len); + + Some(new_choices.owned_choices()) + } else { + Some(OwnedChoices::default()) + } + } + } + + fn len(&self) -> usize { + self.len + } + + fn reset(&mut self) { + self.checked_at_zero = false; + self.len = 0; + self.included = PatternList::default(); + } +} + +impl std::fmt::Display for Repetition<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(id) = &self.id { + write!(f, "{id}")?; + } + + write!(f, "{REPETITION_START_CHAR} {} ", self.inclusive)?; + + if let Some(exclusive) = &self.exclusive { + write!(f, "{NOT_CHAR} {} ", exclusive.borrow())?; + } + + write!(f, "{REPETITION_END_CHAR}") + } +} \ No newline at end of file diff --git a/src/matcher/patterns/rule.rs b/src/matcher/patterns/rule.rs index aba5943..906a28f 100644 --- a/src/matcher/patterns/rule.rs +++ b/src/matcher/patterns/rule.rs @@ -27,12 +27,12 @@ pub struct RulePattern<'s> { anti_conds: Vec>, } -fn contains_gap(tokens: &PatternList<'_>) -> bool { +fn contains_repetition(tokens: &PatternList<'_>) -> bool { for token in tokens.inner() { match token { - Pattern::Gap { .. } => return true, - Pattern::Optional(Optional { option, ..}) if contains_gap(option) => return true, - Pattern::Selection(Selection { options, .. }) if options.iter().any(|tokens| contains_gap(tokens)) => return true, + Pattern::Repetition { .. } => return true, + Pattern::Optional(Optional { option, ..}) if contains_repetition(option) => return true, + Pattern::Selection(Selection { options, .. }) if options.iter().any(|tokens| contains_repetition(tokens)) => return true, _ => (), } } @@ -42,8 +42,8 @@ fn contains_gap(tokens: &PatternList<'_>) -> bool { impl<'s> RulePattern<'s> { pub fn new(input: PatternList<'s>, mut conds: Vec>, anti_conds: Vec>) -> Result> { - if contains_gap(&input) { - return Err(RuleStructureError::GapOutOfCond); + if contains_repetition(&input) { + return Err(RuleStructureError::RepetitionOutOfCond); } if conds.is_empty() { @@ -83,7 +83,7 @@ impl<'s> RulePattern<'s> { new_choices.take_owned(input_choices); // prepares to create condition phones - let mut after_input_phones = phones.clone(); + let mut after_input_phones = *phones; after_input_phones.skip(self.input.len()); // creates the phone iterators for the conditions diff --git a/src/matcher/patterns/tests/ltr_pattern_tests.rs b/src/matcher/patterns/tests/ltr_pattern_tests.rs index 5d3d01e..105f9a8 100644 --- a/src/matcher/patterns/tests/ltr_pattern_tests.rs +++ b/src/matcher/patterns/tests/ltr_pattern_tests.rs @@ -147,22 +147,22 @@ fn agreeing_non_bounds() { } #[test] -fn unbounded_gap() { +fn unbounded_repetition() { let choices = Choices::default(); - let pattern = Pattern::new_gap(None); + let pattern = Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None); let mut match_phones = Phones::new(&[], 0, Direction::Ltr); assert!(pattern.matches(&mut match_phones, &choices).is_some()); } #[test] -fn bounded_gap() { +fn bounded_repetition() { let choices = Choices::default(); let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(None), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("b")], 0, Direction::Ltr); @@ -171,7 +171,7 @@ fn bounded_gap() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(None), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("b")], 0, Direction::Ltr); @@ -180,7 +180,7 @@ fn bounded_gap() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(None), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("-"), Phone::Symbol("-"), Phone::Symbol("b")], 0, Direction::Ltr); @@ -189,7 +189,7 @@ fn bounded_gap() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(None), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Bound, Phone::Symbol("b")], 0, Direction::Ltr); @@ -198,16 +198,54 @@ fn bounded_gap() { } #[test] -fn agreeing_gaps() { +fn bounded_repetition_with_exclusion() { + let choices = Choices::default(); + + let mut patterns = PatternList::new(vec![ + Pattern::new_phone(Phone::Symbol("a")), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), Some(PatternList::new(vec![Pattern::new_phone(Phone::Symbol("c"))]))), + Pattern::new_phone(Phone::Symbol("b")), + ]); + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("b")], 0, Direction::Ltr); + + assert!(patterns.next_match(&mut match_phones, &choices).is_some()); + + patterns.reset(); + + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("c"), Phone::Symbol("b")], 0, Direction::Ltr); + + assert!(patterns.next_match(&mut match_phones, &choices).is_none()); + + patterns.reset(); + + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("c"), Phone::Symbol("z"), Phone::Symbol("b")], 0, Direction::Ltr); + + assert!(patterns.next_match(&mut match_phones, &choices).is_none()); + + patterns.reset(); + + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("z"), Phone::Symbol("c"), Phone::Symbol("b")], 0, Direction::Ltr); + + assert!(patterns.next_match(&mut match_phones, &choices).is_none()); + + patterns.reset(); + + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("z"), Phone::Symbol("b")], 0, Direction::Ltr); + + assert!(patterns.next_match(&mut match_phones, &choices).is_some()); +} + +#[test] +fn agreeing_repetitions() { let choices = Choices::default(); let label = "label"; let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("c")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("b"), Phone::Symbol("c"),], 0, Direction::Ltr); @@ -217,9 +255,9 @@ fn agreeing_gaps() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("c")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("b"), Phone::Symbol("-"), Phone::Symbol("c"),], 0, Direction::Ltr); @@ -229,9 +267,9 @@ fn agreeing_gaps() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("c")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("b"), Phone::Symbol("-"), Phone::Symbol("-"), Phone::Symbol("c"),], 0, Direction::Ltr); diff --git a/src/matcher/patterns/tests/ltr_rules_tests.rs b/src/matcher/patterns/tests/ltr_rules_tests.rs index 1564617..fd921eb 100644 --- a/src/matcher/patterns/tests/ltr_rules_tests.rs +++ b/src/matcher/patterns/tests/ltr_rules_tests.rs @@ -485,4 +485,4 @@ fn zero_input() { assert!(rule_pattern.next_match(&Phones::new(&[], 0, Direction::Ltr)).expect("next match should not error").is_none()); } -// todo: conds, anti-conds, &, &!, with gaps, non phone conds \ No newline at end of file +// todo: conds, anti-conds, &, &!, with repetitions, non phone conds \ No newline at end of file diff --git a/src/matcher/patterns/tests/rtl_pattern_tests.rs b/src/matcher/patterns/tests/rtl_pattern_tests.rs index 0db1430..90cdc40 100644 --- a/src/matcher/patterns/tests/rtl_pattern_tests.rs +++ b/src/matcher/patterns/tests/rtl_pattern_tests.rs @@ -147,22 +147,22 @@ fn agreeing_non_bounds() { } #[test] -fn unbounded_gap() { +fn unbounded_repetition() { let choices = Choices::default(); - let pattern = Pattern::new_gap(None); + let pattern = Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None); let mut match_phones = Phones::new(&[], 0, Direction::Rtl); assert!(pattern.matches(&mut match_phones, &choices).is_some()); } #[test] -fn bounded_gap() { +fn bounded_repetition() { let choices = Choices::default(); let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(None), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("b")], 1, Direction::Rtl); @@ -171,7 +171,7 @@ fn bounded_gap() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(None), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("b")], 2, Direction::Rtl); @@ -180,7 +180,7 @@ fn bounded_gap() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(None), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("-"), Phone::Symbol("-"), Phone::Symbol("b")], 4, Direction::Rtl); @@ -189,7 +189,7 @@ fn bounded_gap() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(None), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Bound, Phone::Symbol("b")], 2, Direction::Rtl); @@ -198,16 +198,54 @@ fn bounded_gap() { } #[test] -fn agreeing_gaps() { +fn bounded_repetition_with_exclusion() { + let choices = Choices::default(); + + let mut patterns = PatternList::new(vec![ + Pattern::new_phone(Phone::Symbol("a")), + Pattern::new_repetition(None, PatternList::new(vec![Pattern::new_any(None)]), Some(PatternList::new(vec![Pattern::new_phone(Phone::Symbol("c"))]))), + Pattern::new_phone(Phone::Symbol("b")), + ]); + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("b")], 1, Direction::Rtl); + + assert!(patterns.next_match(&mut match_phones, &choices).is_some()); + + patterns.reset(); + + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("c"), Phone::Symbol("b")], 2, Direction::Rtl); + + assert!(patterns.next_match(&mut match_phones, &choices).is_none()); + + patterns.reset(); + + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("c"), Phone::Symbol("z"), Phone::Symbol("b")], 3, Direction::Rtl); + + assert!(patterns.next_match(&mut match_phones, &choices).is_none()); + + patterns.reset(); + + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("z"), Phone::Symbol("c"), Phone::Symbol("b")], 3, Direction::Rtl); + + assert!(patterns.next_match(&mut match_phones, &choices).is_none()); + + patterns.reset(); + + let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("z"), Phone::Symbol("b")], 2, Direction::Rtl); + + assert!(patterns.next_match(&mut match_phones, &choices).is_some()); +} + +#[test] +fn agreeing_repetitions() { let choices = Choices::default(); let label = "label"; let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("c")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("b"), Phone::Symbol("c"),], 3, Direction::Rtl); @@ -216,9 +254,9 @@ fn agreeing_gaps() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("c")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("b"), Phone::Symbol("-"), Phone::Symbol("c"),], 4, Direction::Rtl); @@ -228,9 +266,9 @@ fn agreeing_gaps() { let mut patterns = PatternList::new(vec![ Pattern::new_phone(Phone::Symbol("a")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("b")), - Pattern::new_gap(Some(label)), + Pattern::new_repetition(Some(label), PatternList::new(vec![Pattern::new_any(None)]), None), Pattern::new_phone(Phone::Symbol("c")), ]); let mut match_phones = Phones::new(&[Phone::Symbol("a"), Phone::Symbol("-"), Phone::Symbol("b"), Phone::Symbol("-"), Phone::Symbol("-"), Phone::Symbol("c"),], 5, Direction::Rtl); diff --git a/src/matcher/patterns/tests/rtl_rules_tests.rs b/src/matcher/patterns/tests/rtl_rules_tests.rs index fb4ff79..a823e19 100644 --- a/src/matcher/patterns/tests/rtl_rules_tests.rs +++ b/src/matcher/patterns/tests/rtl_rules_tests.rs @@ -485,4 +485,4 @@ fn zero_input() { assert!(rule_pattern.next_match(&Phones::new(&[], 0, Direction::Rtl)).expect("next match should not error").is_none()); } -// todo: conds, anti-conds, &, &!, with gaps, non phone conds \ No newline at end of file +// todo: conds, anti-conds, &, &!, with repetitions, non phone conds \ No newline at end of file diff --git a/src/matcher/phones.rs b/src/matcher/phones.rs index ec43eec..220ea81 100644 --- a/src/matcher/phones.rs +++ b/src/matcher/phones.rs @@ -1,9 +1,7 @@ use crate::{phones::Phone, tokens::Direction}; /// A directional `Iterator` over a list of phones -/// -/// Note: does not implement `Copy` to avoid confusion with implicit copies via the `Iterator` trait -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Phones<'b, 'p> { phone_list: &'b [Phone<'p>], /// the index of the next phone LTR, @@ -33,6 +31,18 @@ impl<'p, 's> Phones<'p, 's> { } } + // gets the number of phones left in the iterator + pub fn rem_len(&self) -> usize { + if let Some(index) = self.index { + 1 + match self.direction { + Direction::Ltr => self.phone_list.len() - index, + Direction::Rtl => index, + } + } else { + 0 + } + } + /// Gets the next phone pub fn next(&mut self) -> &'p Phone<'s> { if let Some(i) = self.index { diff --git a/src/tests/failure_tests.rs b/src/tests/failure_tests.rs index 4cf8bee..f249fd6 100644 --- a/src/tests/failure_tests.rs +++ b/src/tests/failure_tests.rs @@ -15,10 +15,10 @@ fn time_out_of_infinte_loop() { } #[io_test(pollster::block_on)] -fn gap_out_of_cond() { - assert!(await_io! { apply_fallible("abc", "a .. >> b / _ #") }.is_err()); - assert!(await_io! { apply_fallible("a", "a >> b .. c") }.is_err()); - assert!(await_io! { apply_fallible("a", "a $gap .. # >> b $gap .. c") }.is_err()); +fn repetition_out_of_cond() { + assert!(await_io! { apply_fallible("abc", "a [*] >> b / _ #") }.is_err()); + assert!(await_io! { apply_fallible("a", "a >> b [*] c") }.is_err()); + assert!(await_io! { apply_fallible("a", "a $rep [*] # >> b $rep [*] c") }.is_err()); } #[io_test(pollster::block_on)] diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 0836418..445fdd6 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -73,12 +73,7 @@ fn escape() { #[io_test(pollster::block_on)] fn input_escape() { - assert_eq!("bb", await_io! { apply("..", "\\. >> b") }); -} - -#[io_test(pollster::block_on)] -fn reserved_chars() { - assert!(await_io! { apply_fallible("..", ". >> b") }.is_err()); + assert_eq!("bb", await_io! { apply("//", "\\/ >> b") }); } #[io_test(pollster::block_on)] diff --git a/src/tokens.rs b/src/tokens.rs index 80166b3..a0c9945 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,4 +1,4 @@ -use crate::{ir::tokens::IrToken, keywords::{AND_CHAR, ANY_CHAR, INPUT_PATTERN_STR, LTR_CHAR, MATCH_CHAR, NOT_CHAR, OPTIONAL_END_CHAR, OPTIONAL_START_CHAR, RTL_CHAR, SELECTION_END_CHAR, SELECTION_START_CHAR}}; +use crate::{ir::tokens::IrToken, keywords::{AND_CHAR, ANY_CHAR, REPETITION_END_CHAR, REPETITION_START_CHAR, INPUT_PATTERN_STR, LTR_CHAR, MATCH_CHAR, NOT_CHAR, OPTIONAL_END_CHAR, OPTIONAL_START_CHAR, RTL_CHAR, SELECTION_END_CHAR, SELECTION_START_CHAR}}; use std::{fmt::Display, rc::Rc}; @@ -78,6 +78,8 @@ pub enum ScopeType { Optional, /// A scope that adds one of its items to the phone list Selection, + /// A scope that represents a repetition of phones + Repetition, } impl ScopeType { @@ -85,6 +87,7 @@ impl ScopeType { match self { ScopeType::Optional => OPTIONAL_START_CHAR, ScopeType::Selection => SELECTION_START_CHAR, + ScopeType::Repetition => REPETITION_START_CHAR, } } @@ -92,6 +95,7 @@ impl ScopeType { match self { ScopeType::Optional => OPTIONAL_END_CHAR, ScopeType::Selection => SELECTION_END_CHAR, + ScopeType::Repetition => REPETITION_END_CHAR, } } }