Skip to content

Commit a69ebd9

Browse files
Better globbing support
1 parent 43fecb4 commit a69ebd9

10 files changed

Lines changed: 242 additions & 79 deletions

File tree

README.md

Lines changed: 1 addition & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -50,55 +50,10 @@ cat source.xml | ./unaware -format xml -method deterministic > masked.xml
5050

5151
### Filtering
5252

53-
You can control which fields are masked using the `-include` and `-exclude` flags, which both accept glob patterns (e.g., `user.*`, `session.ip_*`).
53+
You can control which fields are masked using the `-include` and `-exclude` flags, which both accept glob patterns (e.g., `user.*`, `session.ip_*`, `**.email`, `user.*.id`). These patterns allow for flexible matching of field names and nested paths.
5454

5555
- **Default Behavior:** If no flags are used, all fields are masked.
5656
- **Using `-include`:** Specifies which fields *should* be masked. When `-include` patterns are used, only fields matching them will be considered for masking.
5757
- **Using `-exclude`:** Specifies fields that *should not* be masked, creating exceptions.
5858
- **Combining Flags:** When used together, `-exclude` always takes precedence. A field is only masked if it matches an `-include` pattern but does *not* match an `-exclude` pattern. If only `-exclude` is used, all fields are masked *except* for those that match an exclusion pattern.
5959

60-
For example, given `data.json`:
61-
```json
62-
{
63-
"user": {
64-
"id": "aa1",
65-
"personal_info": {
66-
"subscriber": "uuid-123",
67-
"name": "Jane Doe",
68-
"email": "jane.doe@example.com"
69-
}
70-
},
71-
"session": {
72-
"ip_address": "198.51.100.22",
73-
"timestamp": "2025-11-25T10:00:00Z"
74-
}
75-
}
76-
```
77-
78-
**Goal:** Mask all sensitive user details and the session IP address, but leave the subscriber field untouched for reference.
79-
80-
**Command:**
81-
```shell
82-
./unaware -in data.json -include 'user.personal_info.*' -include 'session.ip_address' -exclude 'user.personal_info.subscriber'
83-
```
84-
85-
**Explanation:**
86-
The command first designates all fields under `user.personal_info` and `session.ip_address` for masking with the `-include` flags. Then, the `-exclude` flag creates an exception for `user.personal_info.subscriber`, preventing it from being masked even though it was matched by the include pattern.
87-
88-
**Result:**
89-
```json
90-
{
91-
"user": {
92-
"id": "aa1",
93-
"personal_info": {
94-
"subscriber": "uuid-123",
95-
"name": "Burger Iron",
96-
"email": "kees@friet.nl"
97-
}
98-
},
99-
"session": {
100-
"ip_address": "238.108.102.226",
101-
"timestamp": "2025-11-25T10:00:00Z"
102-
}
103-
}
104-
```

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ require (
1919
github.com/cespare/xxhash/v2 v2.1.1 // indirect
2020
github.com/davecgh/go-spew v1.1.1 // indirect
2121
github.com/dustin/go-humanize v1.0.1 // indirect
22+
github.com/gobwas/glob v0.2.3 // indirect
2223
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
2324
github.com/pkg/errors v0.9.1 // indirect
2425
github.com/pmezard/go-difflib v1.0.0 // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 h1:fAjc9m62+UWV/WA
1515
github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
1616
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
1717
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
18+
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
19+
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
1820
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
1921
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
2022
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=

main.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ func main() {
3333
fmt.Fprintf(out, " unaware -format json -in input.json -out masked.json\n\n")
3434
fmt.Fprintf(out, " # Mask a CSV file, keeping the output consistent between runs\n")
3535
fmt.Fprintf(out, " STATIC_SALT=secret-key unaware -format csv -method deterministic -in data.csv > data_masked.csv\n\n")
36-
fmt.Fprintf(out, " # Mask only email fields in a large JSON file\n")
37-
fmt.Fprintf(out, " cat users.json | unaware -format json -include \"*.email\" > masked.json\n\n")
36+
fmt.Fprintf(out, " # Mask only email fields (using a glob pattern) in a large JSON file\n")
37+
fmt.Fprintf(out, " # Use \"**\" to match across multiple nested levels (e.g., \"**.email\")\n")
38+
fmt.Fprintf(out, " cat users.json | unaware -format json -include \"**.email\" > masked.json\n\n")
3839
fmt.Fprintf(out, "FLAGS:\n")
3940
flag.PrintDefaults()
4041
}

pkg/engine.go

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,34 @@ import (
99
"io"
1010
"net"
1111
"net/url"
12-
"path/filepath"
1312
"regexp"
1413
"strconv"
1514
"strings"
1615
"time"
1716

18-
"github.com/araddon/dateparse"
1917
"github.com/brianvoe/gofakeit/v6"
2018
"github.com/dgraph-io/ristretto"
19+
"github.com/gobwas/glob"
2120
"github.com/google/uuid"
21+
"github.com/jacoelho/banking/iban"
2222
"github.com/nyaruka/phonenumbers"
2323
"github.com/theplant/luhn"
2424
"golang.org/x/text/cases"
2525
"golang.org/x/text/language"
2626

27-
"github.com/jacoelho/banking/iban"
27+
"github.com/araddon/dateparse"
2828
)
2929

3030
// AppConfig holds the complete configuration for a masking operation.
3131
type AppConfig struct {
32-
Format string
33-
CPUCount int
34-
Include []string
35-
Exclude []string
36-
FirstN int
37-
Masker MaskerConfig
32+
Format string `json:"format"`
33+
CPUCount int `json:"cpu_count"`
34+
Include []string `json:"include"`
35+
Exclude []string `json:"exclude"`
36+
FirstN int `json:"first_n"`
37+
Masker MaskerConfig
38+
IncludeGlobs []glob.Glob `json:"-"`
39+
ExcludeGlobs []glob.Glob `json:"-"`
3840
}
3941

4042
type processor interface {
@@ -57,6 +59,23 @@ type MaskerConfig struct {
5759

5860
// Start initiates the masking process based on the provided configuration.
5961
func Start(r io.Reader, w io.Writer, config AppConfig) error {
62+
// Pre-compile glob patterns once at startup for performance during masking.
63+
// This avoids re-parsing the patterns for every key in the input data.
64+
for _, pattern := range config.Include {
65+
g, err := glob.Compile(pattern, '.')
66+
if err != nil {
67+
return fmt.Errorf("invalid include pattern %q: %w", pattern, err)
68+
}
69+
config.IncludeGlobs = append(config.IncludeGlobs, g)
70+
}
71+
for _, pattern := range config.Exclude {
72+
g, err := glob.Compile(pattern, '.')
73+
if err != nil {
74+
return fmt.Errorf("invalid exclude pattern %q: %w", pattern, err)
75+
}
76+
config.ExcludeGlobs = append(config.ExcludeGlobs, g)
77+
}
78+
6079
var p processor
6180
switch config.Format {
6281
case "json":
@@ -74,17 +93,17 @@ func Start(r io.Reader, w io.Writer, config AppConfig) error {
7493
return p.Process(r, w)
7594
}
7695

77-
func shouldMask(key string, include, exclude []string) bool {
96+
func shouldMask(key string, include, exclude []glob.Glob) bool {
7897
if len(exclude) > 0 {
79-
for _, pattern := range exclude {
80-
if matched, _ := filepath.Match(pattern, key); matched {
98+
for _, g := range exclude {
99+
if g.Match(key) {
81100
return false
82101
}
83102
}
84103
}
85104
if len(include) > 0 {
86-
for _, pattern := range include {
87-
if matched, _ := filepath.Match(pattern, key); matched {
105+
for _, g := range include {
106+
if g.Match(key) {
88107
return true
89108
}
90109
}
@@ -155,11 +174,11 @@ func newConcurrentRunner(methodFactory func() *masker, config AppConfig) *concur
155174
}
156175

157176
type masker struct {
158-
faker *gofakeit.Faker
159-
seeder seeder
160-
cache *ristretto.Cache
161-
dateLayouts []string
162-
emailRegex *regexp.Regexp
177+
faker *gofakeit.Faker
178+
seeder seeder
179+
cache *ristretto.Cache
180+
dateLayouts []string
181+
emailRegex *regexp.Regexp
163182
numLikeRegex *regexp.Regexp
164183
ulidRegex *regexp.Regexp
165184
ksuidRegex *regexp.Regexp
@@ -179,8 +198,8 @@ func newMasker(config MaskerConfig) *masker {
179198
"01/02/2006",
180199
time.RFC1123,
181200
},
182-
emailRegex: regexp.MustCompile(`^[^@\s]+@[^@\s]+\.[^@\s]+$`),
183-
numLikeRegex: regexp.MustCompile(`^[\d\s-]+$`),
201+
emailRegex: regexp.MustCompile(`^[^@\s]+@[^@\s]+\.[^@\s]+$`),
202+
numLikeRegex: regexp.MustCompile(`^[\d\s-]+$`),
184203
ulidRegex: regexp.MustCompile(`(?i)^[0-7][0-9a-hjkmnp-tv-z]{25}$`),
185204
ksuidRegex: regexp.MustCompile(`^[a-zA-Z0-9]{27}$`),
186205
creditCardRegex: regexp.MustCompile(`^(?:\d[ -]*?){13,16}$`),

pkg/json.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ func isWhitespace(c byte) bool { return c == ' ' || c == '\n' || c == '\r' || c
180180
func (jp *jsonProcessor) recursiveMask(m *masker, key string, data any) any {
181181
switch v := data.(type) {
182182
case json.Number:
183-
if shouldMask(key, jp.config.Include, jp.config.Exclude) {
183+
if shouldMask(key, jp.config.IncludeGlobs, jp.config.ExcludeGlobs) {
184184
s := v.String()
185185
if strings.Contains(s, ".") {
186186
parts := strings.Split(s, ".")
@@ -191,7 +191,7 @@ func (jp *jsonProcessor) recursiveMask(m *masker, key string, data any) any {
191191
}
192192
return v
193193
case string, bool, nil:
194-
if shouldMask(key, jp.config.Include, jp.config.Exclude) {
194+
if shouldMask(key, jp.config.IncludeGlobs, jp.config.ExcludeGlobs) {
195195
return m.mask(v)
196196
}
197197
return v

pkg/worker.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ func (cr *concurrentRunner) worker(wg *sync.WaitGroup, jobs <-chan job, results
9797
func (cr *concurrentRunner) recursiveMask(m *masker, key string, data any) any {
9898
switch v := data.(type) {
9999
case json.Number, string, bool, nil:
100-
if shouldMask(key, cr.config.Include, cr.config.Exclude) {
100+
if shouldMask(key, cr.config.IncludeGlobs, cr.config.ExcludeGlobs) {
101101
return m.mask(v)
102102
}
103103
return v
@@ -107,7 +107,7 @@ func (cr *concurrentRunner) recursiveMask(m *masker, key string, data any) any {
107107
if k == "#text" {
108108
// This is the text content of the parent element (e.g., the "2002" in <year>2002</year>).
109109
// The key for filtering is the parent's key, which is already in the 'key' variable.
110-
if shouldMask(key, cr.config.Include, cr.config.Exclude) {
110+
if shouldMask(key, cr.config.IncludeGlobs, cr.config.ExcludeGlobs) {
111111
maskedMap[k] = m.mask(value)
112112
} else {
113113
maskedMap[k] = value
@@ -131,7 +131,7 @@ func (cr *concurrentRunner) recursiveMask(m *masker, key string, data any) any {
131131
}
132132
return maskedSlice
133133
default:
134-
if shouldMask(key, cr.config.Include, cr.config.Exclude) {
134+
if shouldMask(key, cr.config.IncludeGlobs, cr.config.ExcludeGlobs) {
135135
return m.mask(v)
136136
}
137137
return v

pkg/xml.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ func (xp *xmlProcessor) processSerially(decoder *xml.Decoder, w io.Writer) error
262262
for i := range startElem.Attr {
263263
attr := &startElem.Attr[i]
264264
fullKey := strings.Join(path, ".") + "." + attr.Name.Local
265-
if shouldMask(fullKey, xp.config.Include, xp.config.Exclude) {
265+
if shouldMask(fullKey, xp.config.IncludeGlobs, xp.config.ExcludeGlobs) {
266266
maskedValue := serialMasker.mask(attr.Value)
267267
attr.Value = fmt.Sprintf("%v", maskedValue)
268268
}
@@ -274,7 +274,7 @@ func (xp *xmlProcessor) processSerially(decoder *xml.Decoder, w io.Writer) error
274274
trimmedData := strings.TrimSpace(string(se))
275275
if len(trimmedData) > 0 {
276276
fullKey := strings.Join(path, ".")
277-
if shouldMask(fullKey, xp.config.Include, xp.config.Exclude) {
277+
if shouldMask(fullKey, xp.config.IncludeGlobs, xp.config.ExcludeGlobs) {
278278
maskedValue := serialMasker.mask(trimmedData)
279279
maskedString := fmt.Sprintf("%v", maskedValue)
280280
if err := encoder.EncodeToken(xml.CharData(maskedString)); err != nil {

test/filtering_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ func TestFilteringScenarios(t *testing.T) {
7575
name: "JSON - Exclude Only (Blacklist)",
7676
format: "json",
7777
input: jsonInput,
78-
exclude: []string{"*.id", "*.ip_address"},
78+
exclude: []string{"**.id", "**.ip_address"},
7979
expected: []string{
8080
`"id": "user-123"`,
8181
`"ip_address": "203.0.113.195"`,
@@ -84,8 +84,7 @@ func TestFilteringScenarios(t *testing.T) {
8484
},
8585
},
8686
{
87-
name: "JSON - Include Only (Whitelist)",
88-
format: "json",
87+
name: "JSON - Include Only (Whitelist)", format: "json",
8988
input: jsonInput,
9089
include: []string{"user.personal.*"},
9190
expected: []string{

0 commit comments

Comments
 (0)