Skip to content

Commit f9f5e68

Browse files
fix(core): use byte offsets for position reporting in raw-scoped script rules (#1081)
Script rules with `scope: raw` return begin/end byte offsets in their match arrays, but AddAlert ignores these and performs a text search via FindLoc/initialPosition to determine the alert position. When the matched text appears multiple times in the document, this always reports the position of the first occurrence rather than the intended one. Add locFromByteOffset() to compute line:column directly from the byte offsets the script provides, bypassing the text-search path. The new path activates when the alert carries valid byte offsets within a raw-scope block, falling back to the existing FindLoc path otherwise. Relates to #869, #272. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 034c5f1 commit f9f5e68

4 files changed

Lines changed: 61 additions & 21 deletions

File tree

internal/check/script.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,13 @@ func (s Script) Run(blk nlp.Block, _ *core.File, _ *core.Config) ([]core.Alert,
7878
// don't use our custom regexp2 library, which means the offsets
7979
// (`re2loc`) will be off.
8080
a := core.Alert{
81-
Check: s.Name,
82-
Severity: s.Level,
83-
Span: matchLoc,
84-
Link: s.Link,
85-
Match: matchText,
86-
Action: s.Action}
81+
Check: s.Name,
82+
Severity: s.Level,
83+
Span: matchLoc,
84+
Link: s.Link,
85+
Match: matchText,
86+
Action: s.Action,
87+
HasByteOffsets: true}
8788

8889
if matchMsg, ok := match["message"].(string); ok {
8990
a.Message, a.Description = formatMessages(matchMsg, s.Description, matchText)

internal/core/alert.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,10 @@ type Alert struct {
2727
Message string // the output message
2828
Severity string // 'suggestion', 'warning', or 'error'
2929
Match string // the actual matched text
30-
Line int // the source line
31-
Limit int `json:"-"` // the max times to report
32-
Hide bool `json:"-"` // should we hide this alert?
30+
Line int // the source line
31+
Limit int `json:"-"` // the max times to report
32+
Hide bool `json:"-"` // should we hide this alert?
33+
HasByteOffsets bool `json:"-"` // Span holds byte offsets into the raw document
3334
}
3435

3536
// FormatAlert ensures that all required fields have data.

internal/core/file.go

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,32 @@ func (f *File) assignLoc(ctx string, blk nlp.Block, pad int, a Alert) (int, []in
252252
return blk.Line + 1, a.Span
253253
}
254254

255+
// locFromByteOffset computes a 1-based line number and a [col, col+len] span
256+
// from absolute byte offsets into the raw document text. This avoids the
257+
// text-search approach used by FindLoc/initialPosition, which can report the
258+
// wrong location when the matched text appears more than once.
259+
func locFromByteOffset(ctx string, begin, end, pad int) (int, []int) {
260+
line := 1
261+
lineStart := 0
262+
263+
for i := 0; i < begin && i < len(ctx); i++ {
264+
if ctx[i] == '\n' {
265+
line++
266+
lineStart = i + 1
267+
}
268+
}
269+
270+
col := nlp.StrLen(ctx[lineStart:begin]) + 1 + pad
271+
matchLen := nlp.StrLen(ctx[begin:end])
272+
273+
span := []int{col, col + matchLen - 1}
274+
if span[1] <= 0 {
275+
span[1] = 1
276+
}
277+
278+
return line, span
279+
}
280+
255281
// SetText updates the file's content, lines, and history.
256282
func (f *File) SetText(s string) {
257283
f.Content = s
@@ -271,19 +297,31 @@ func (f *File) AddAlert(a Alert, blk nlp.Block, lines, pad int, lookup bool) {
271297
ctx = old
272298
}
273299

274-
// NOTE: If the `ctx` document is large (as could be the case with
275-
// `scope: raw`) this is *slow*. Thus, the cap at 1k.
300+
// When the alert carries byte offsets from a script rule and falls within
301+
// the document, compute line:column directly from those offsets instead of
302+
// performing a text search. This fixes incorrect position reporting for
303+
// script rules with `scope: raw` when the matched text appears more than
304+
// once.
276305
//
277-
// TODO: Actually fix this.
278-
if len(a.Offset) == 0 && strings.Count(ctx, a.Match) > 1 && len(ctx) < 1000 {
279-
a.Offset = append(a.Offset, strings.Fields(ctx[0:a.Span[0]])...)
280-
}
306+
// We use blk.Context (the original document) rather than ctx, which may
307+
// have been modified by ChkToCtx substitutions from earlier alerts.
308+
if a.HasByteOffsets && a.Span[0] >= 0 && a.Span[1] <= len(blk.Context) {
309+
a.Line, a.Span = locFromByteOffset(blk.Context, a.Span[0], a.Span[1], pad)
310+
} else {
311+
// NOTE: If the `ctx` document is large (as could be the case with
312+
// `scope: raw`) this is *slow*. Thus, the cap at 1k.
313+
//
314+
// TODO: Actually fix this.
315+
if len(a.Offset) == 0 && strings.Count(ctx, a.Match) > 1 && len(ctx) < 1000 {
316+
a.Offset = append(a.Offset, strings.Fields(ctx[0:a.Span[0]])...)
317+
}
281318

282-
if !lookup {
283-
a.Line, a.Span = f.assignLoc(ctx, blk, pad, a)
284-
}
285-
if (!lookup && a.Span[0] < 0) || lookup {
286-
a.Line, a.Span = f.FindLoc(ctx, blk.Text, pad, lines, a)
319+
if !lookup {
320+
a.Line, a.Span = f.assignLoc(ctx, blk, pad, a)
321+
}
322+
if (!lookup && a.Span[0] < 0) || lookup {
323+
a.Line, a.Span = f.FindLoc(ctx, blk.Text, pad, lines, a)
324+
}
287325
}
288326

289327
if a.Span[0] > 0 {

testdata/features/checks.feature

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Feature: Checks
33
When I test "checks/Script"
44
Then the output should contain exactly:
55
"""
6-
test.md:4:19:Scripts.CustomMsg:Some message
6+
test.md:1:2:Scripts.CustomMsg:Some message
77
test.md:29:1:Checks.ScriptRE:Consider inserting a new section heading at this point.
88
test.md:39:1:Checks.ScriptRE:Consider inserting a new section heading at this point.
99
"""

0 commit comments

Comments
 (0)