Skip to content

Commit 3f71270

Browse files
committed
fix: Enhance stepsToString to properly handle parentheses
- Ensure no space before or after an opening parenthesis `(` - Ensure no space before a closing parenthesis `)` - Add space after a closing parenthesis if followed by a word - Use strings.Builder for efficient string concatenation - Maintain correct spacing for punctuation and apostrophes This improves text formatting, especially for cases with nested phrases in parentheses. WIP
1 parent e32b21d commit 3f71270

File tree

2 files changed

+35
-12
lines changed

2 files changed

+35
-12
lines changed

internal/check/sequence.go

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ type NLPToken struct {
2828
type Sequence struct {
2929
Definition `mapstructure:",squash"`
3030
Tokens []NLPToken
31-
history []int
3231
Ignorecase bool
3332
needsTagging bool
3433
}
@@ -134,7 +133,7 @@ func tokensMatch(token NLPToken, word tag.Token) bool {
134133
return true
135134
}
136135

137-
func sequenceMatches(idx int, chk Sequence, target NLPToken, words []tag.Token) ([]string, int) {
136+
func sequenceMatches(idx int, chk Sequence, target NLPToken, words []tag.Token, history []int) ([]string, int) {
138137
var text []string
139138

140139
toks := chk.Tokens
@@ -144,7 +143,7 @@ func sequenceMatches(idx int, chk Sequence, target NLPToken, words []tag.Token)
144143
index := 0
145144

146145
for jdx, tok := range words {
147-
if tokensMatch(target, tok) && !core.IntInSlice(jdx, chk.history) {
146+
if tokensMatch(target, tok) && !core.IntInSlice(jdx, history) {
148147
index = jdx
149148
// We've found our context.
150149
//
@@ -213,21 +212,42 @@ func sequenceMatches(idx int, chk Sequence, target NLPToken, words []tag.Token)
213212
}
214213

215214
func stepsToString(steps []string) string {
216-
s := ""
217-
for _, step := range steps {
218-
if strings.HasPrefix(step, "'") {
219-
s += step
220-
} else {
221-
s += " " + step
215+
var sb strings.Builder
216+
217+
for i, step := range steps {
218+
switch {
219+
case step == "." || step == "," || step == ":" || step == ";" || step == "!" || step == "?" || step == "'" || step == `"` || step == ")":
220+
// No space before punctuation or closing parenthesis
221+
sb.WriteString(step)
222+
case step == "(":
223+
// No space before or after an opening parenthesis
224+
if i > 0 && sb.Len() > 0 {
225+
lastChar := sb.String()[sb.Len()-1]
226+
if lastChar != ' ' {
227+
sb.WriteString(" ")
228+
}
229+
}
230+
sb.WriteString(step)
231+
case strings.HasPrefix(step, "'"):
232+
// If the step starts with an apostrophe, attach it without space
233+
sb.WriteString(step)
234+
default:
235+
// Otherwise, add space before the word
236+
if sb.Len() > 0 {
237+
sb.WriteString(" ")
238+
}
239+
sb.WriteString(step)
222240
}
223241
}
224-
return strings.Trim(s, " ")
242+
243+
return strings.TrimSpace(sb.String())
225244
}
226245

227246
// Run looks for the user-defined sequence of tokens.
228247
func (s Sequence) Run(blk nlp.Block, f *core.File, _ *core.Config) ([]core.Alert, error) {
229248
var alerts []core.Alert
230249
var offset []string
250+
var history []int
231251

232252
// This is *always* sentence-scoped.
233253
words := nlp.TextToTokens(blk.Text, &f.NLP)
@@ -238,8 +258,8 @@ func (s Sequence) Run(blk nlp.Block, f *core.File, _ *core.Config) ([]core.Alert
238258
// We're looking for our "anchor" ...
239259
for _, loc := range tok.re.FindAllStringIndex(txt, -1) {
240260
// These are all possible violations in `txt`:
241-
steps, index := sequenceMatches(idx, s, tok, words)
242-
s.history = append(s.history, index) //nolint:staticcheck
261+
steps, index := sequenceMatches(idx, s, tok, words, history)
262+
history = append(history, index)
243263

244264
if len(steps) > 0 {
245265
seq := stepsToString(steps)

testdata/features/checks.feature

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,15 @@ Feature: Checks
122122
"""
123123
test.adoc:3:4:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'write' after 'be' requries 'to'. Did you mean 'be great *to* write'?
124124
test.adoc:9:88:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'come' after 'be' requries 'to'. Did you mean 'be available *to* come'?
125+
test.adoc:11:7:LanguageTool.AMBIG:Avoid ambiguous pronouns
125126
test.adoc:11:32:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
126127
test.adoc:13:5:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
127128
test.adoc:15:24:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
128129
test.adoc:17:42:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
129130
test.adoc:21:5:LanguageTool.APOS_ARE:Did you mean "endpoints" instead of "endpoint's"?
130131
test.md:3:4:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'write' after 'be' requries 'to'. Did you mean 'be great *to* write'?
131132
test.md:9:88:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'come' after 'be' requries 'to'. Did you mean 'be available *to* come'?
133+
test.md:11:7:LanguageTool.AMBIG:Avoid ambiguous pronouns
132134
test.md:11:32:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
133135
test.md:13:5:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
134136
test.md:15:24:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
@@ -139,6 +141,7 @@ Feature: Checks
139141
test.md:31:17:LanguageTool.ARE_USING:Use 'by using' instead of 'using' when it follows a noun for clarity and grammatical correctness.
140142
test.txt:3:4:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'write' after 'be' requries 'to'. Did you mean 'be great *to* write'?
141143
test.txt:9:88:LanguageTool.WOULD_BE_JJ_VB:The infinitive 'come' after 'be' requries 'to'. Did you mean 'be available *to* come'?
144+
test.txt:11:7:LanguageTool.AMBIG:Avoid ambiguous pronouns
142145
test.txt:11:32:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
143146
test.txt:13:5:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.
144147
test.txt:15:24:LanguageTool.OF_ALL_TIMES:In this context, the idiom needs to be spelled 'of all time'.

0 commit comments

Comments
 (0)