-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathword_position.go
More file actions
119 lines (96 loc) · 3.38 KB
/
word_position.go
File metadata and controls
119 lines (96 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package main
import (
"fmt"
"unicode"
)
// WordPosition represents the location of a word in the text.
type WordPosition struct {
Start int // 0-based start index (character) within the string
End int // 0-based end index (character) within the string
Word string // The word itself (without punctuation)
}
func GetWordPositionsSingleLine(text string) []WordPosition {
var wordPositions []WordPosition
for i := 0; i < len(text); {
// Skip whitespace and punctuation at the beginning of a potential word
for i < len(text) && (unicode.IsSpace(rune(text[i])) || unicode.IsPunct(rune(text[i]))) {
i++
}
// If we reached the end of the string, stop
if i == len(text) {
break
}
// Find the start of a word
wordStart := i
wordStartCharIndex := i
// Find the end of the word (until space or punctuation)
for i < len(text) && !unicode.IsSpace(rune(text[i])) && !unicode.IsPunct(rune(text[i])) {
i++
}
wordEnd := i
wordEndCharIndex := i - 1 // Adjust end index to be inclusive
// Extract the word
word := text[wordStart:wordEnd]
// Add the word position to the list
if len(word) > 0 {
wordPositions = append(wordPositions, WordPosition{
Start: wordStartCharIndex,
End: wordEndCharIndex,
Word: word,
})
}
}
return wordPositions
}
// GetWordAtIndex finds the word that contains the given character index.
// It returns the word string and its start and end indices within the input string.
// If the index is not within a word, it returns an empty string and -1, -1.
func GetWordAtIndex(text string, index int) (word string, startIndex int, endIndex int) {
// Handle invalid index
if index < 0 || index >= len(text) {
return "", -1, -1
}
// If the character at the index is not a word character (space or punctuation),
// return empty string and -1, -1
if unicode.IsSpace(rune(text[index])) || unicode.IsPunct(rune(text[index])) {
return "", -1, -1
}
// Find the start of the word by moving left until space or punctuation
startIndex = index
for startIndex > 0 && !unicode.IsSpace(rune(text[startIndex-1])) && !unicode.IsPunct(rune(text[startIndex-1])) {
startIndex--
}
// Find the end of the word by moving right until space or punctuation
endIndex = index
for endIndex < len(text)-1 && !unicode.IsSpace(rune(text[endIndex+1])) && !unicode.IsPunct(rune(text[endIndex+1])) {
endIndex++
}
// Extract the word
word = text[startIndex : endIndex+1]
return word, startIndex, endIndex
}
func test2() {
text := "Hello, this is a text with words and some punctuation!"
index := 15 // Index within the word "text"
word, startIndex, endIndex := GetWordAtIndex(text, index)
if word != "" {
fmt.Printf("At index %d, the word is \"%s\" (from index %d to %d)\n", index, word, startIndex, endIndex)
} else {
fmt.Printf("At index %d, no word was found.\n", index)
}
// Test with an index within punctuation
index = 5 // Index at the comma
word, startIndex, endIndex = GetWordAtIndex(text, index)
if word != "" {
fmt.Printf("At index %d, the word is \"%s\" (from index %d to %d)\n", index, word, startIndex, endIndex)
} else {
fmt.Printf("At index %d, no word was found.\n", index)
}
}
func test() {
text := "Hello, this is a text with words and some punctuation!"
wordPositions := GetWordPositionsSingleLine(text)
for _, wp := range wordPositions {
fmt.Printf("Start: %d, End: %d, Word: \"%s\"\n", wp.Start, wp.End, wp.Word)
}
}