fix: Enhance stepsToString to properly handle parentheses

jdkato · jdkato · commit 3f712700b291 · 2025-03-18T22:51:04.000-07:00
- Ensure no space before or after an opening parenthesis `(`
- Ensure no space before a closing parenthesis `)`
- Add space after a closing parenthesis if followed by a word
- Use strings.Builder for efficient string concatenation
- Maintain correct spacing for punctuation and apostrophes

This improves text formatting, especially for cases with nested phrases in parentheses.

WIP
diff --git a/internal/check/sequence.go b/internal/check/sequence.go
@@ -28,7 +28,6 @@ type NLPToken struct {
 type Sequence struct {
 	Definition   `mapstructure:",squash"`
 	Tokens       []NLPToken
-	history      []int
 	Ignorecase   bool
 	needsTagging bool
 }
@@ -134,7 +133,7 @@ func tokensMatch(token NLPToken, word tag.Token) bool {
 	return true
 }
 
-func sequenceMatches(idx int, chk Sequence, target NLPToken, words []tag.Token) ([]string, int) {
+func sequenceMatches(idx int, chk Sequence, target NLPToken, words []tag.Token, history []int) ([]string, int) {
 	var text []string
 
 	toks := chk.Tokens
@@ -144,7 +143,7 @@ func sequenceMatches(idx int, chk Sequence, target NLPToken, words []tag.Token)
 	index := 0
 
 	for jdx, tok := range words {
-		if tokensMatch(target, tok) && !core.IntInSlice(jdx, chk.history) {
+		if tokensMatch(target, tok) && !core.IntInSlice(jdx, history) {
 			index = jdx
 			// We've found our context.
 			//
@@ -213,21 +212,42 @@ func sequenceMatches(idx int, chk Sequence, target NLPToken, words []tag.Token)
 }
 
 func stepsToString(steps []string) string {
-	s := ""
-	for _, step := range steps {
-		if strings.HasPrefix(step, "'") {
-			s += step
-		} else {
-			s += " " + step
+	var sb strings.Builder
+
+	for i, step := range steps {
+		switch {
+		case step == "." || step == "," || step == ":" || step == ";" || step == "!" || step == "?" || step == "'" || step == `"` || step == ")":
+			// No space before punctuation or closing parenthesis
+			sb.WriteString(step)
+		case step == "(":
+			// No space before or after an opening parenthesis
+			if i > 0 && sb.Len() > 0 {
+				lastChar := sb.String()[sb.Len()-1]
+				if lastChar != ' ' {
+					sb.WriteString(" ")
+				}
+			}
+			sb.WriteString(step)
+		case strings.HasPrefix(step, "'"):
+			// If the step starts with an apostrophe, attach it without space
+			sb.WriteString(step)
+		default:
+			// Otherwise, add space before the word
+			if sb.Len() > 0 {
+				sb.WriteString(" ")
+			}
+			sb.WriteString(step)
 		}
 	}
-	return strings.Trim(s, " ")
+
+	return strings.TrimSpace(sb.String())
 }
 
 // Run looks for the user-defined sequence of tokens.
 func (s Sequence) Run(blk nlp.Block, f *core.File, _ *core.Config) ([]core.Alert, error) {
 	var alerts []core.Alert
 	var offset []string
+	var history []int
 
 	// This is *always* sentence-scoped.
 	words := nlp.TextToTokens(blk.Text, &f.NLP)
@@ -238,8 +258,8 @@ func (s Sequence) Run(blk nlp.Block, f *core.File, _ *core.Config) ([]core.Alert
 			// We're looking for our "anchor" ...
 			for _, loc := range tok.re.FindAllStringIndex(txt, -1) {
 				// These are all possible violations in `txt`:
-				steps, index := sequenceMatches(idx, s, tok, words)
-				s.history = append(s.history, index) //nolint:staticcheck
+				steps, index := sequenceMatches(idx, s, tok, words, history)
+				history = append(history, index)
 
 				if len(steps) > 0 {
 					seq := stepsToString(steps)
diff --git a/testdata/features/checks.feature b/testdata/features/checks.feature
@@ -122,13 +122,15 @@ Feature: Checks
             """
             test.adoc:3:4:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'write' after 'be' requries 'to'. Did you mean 'be great *to* write'?
             test.adoc:9:88:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'come' after 'be' requries 'to'. Did you mean 'be available *to* come'?
+            test.adoc:11:7:LanguageTool.AMBIG:Avoid ambiguous pronouns
             test.adoc:11:32:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
             test.adoc:13:5:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
             test.adoc:15:24:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
             test.adoc:17:42:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
             test.adoc:21:5:LanguageTool.APOS_ARE:Did you mean "endpoints" instead of "endpoint's"?
             test.md:3:4:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'write' after 'be' requries 'to'. Did you mean 'be great *to* write'?
             test.md:9:88:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'come' after 'be' requries 'to'. Did you mean 'be available *to* come'?
+            test.md:11:7:LanguageTool.AMBIG:Avoid ambiguous pronouns
             test.md:11:32:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
             test.md:13:5:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
             test.md:15:24:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
@@ -139,6 +141,7 @@ Feature: Checks
             test.md:31:17:LanguageTool.ARE_USING:Use 'by using' instead of 'using' when it follows a noun for clarity and grammatical correctness.
             test.txt:3:4:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'write' after 'be' requries 'to'. Did you mean 'be great *to* write'?
             test.txt:9:88:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'come' after 'be' requries 'to'. Did you mean 'be available *to* come'?
+            test.txt:11:7:LanguageTool.AMBIG:Avoid ambiguous pronouns
             test.txt:11:32:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
             test.txt:13:5:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
             test.txt:15:24:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.