Skip to content

Commit e50f480

Browse files
Scrub sensitive paths and emails from deploy telemetry error messages
Adds a scrubber that runs before error messages are sent to telemetry: 1. Replaces the bundle root path with "." to avoid leaking local paths 2. Redacts remaining home directory paths (/Users/..., /home/..., C:\Users\...) 3. Redacts email addresses (e.g., in workspace paths) Inspired by VS Code's telemetry path scrubbing and Sentry's @userpath rule. Co-authored-by: Isaac
1 parent a2fb5dd commit e50f480

File tree

3 files changed

+210
-0
lines changed

3 files changed

+210
-0
lines changed

bundle/phases/telemetry.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ const maxErrorMessageLength = 500
3838

3939
// LogDeployTelemetry logs a telemetry event for a bundle deploy command.
4040
func LogDeployTelemetry(ctx context.Context, b *bundle.Bundle, errMsg string) {
41+
errMsg = scrubForTelemetry(errMsg, b.BundleRootPath)
42+
4143
if len(errMsg) > maxErrorMessageLength {
4244
errMsg = errMsg[:maxErrorMessageLength]
4345
}

bundle/phases/telemetry_scrub.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package phases
2+
3+
import (
4+
"path/filepath"
5+
"regexp"
6+
"strings"
7+
)
8+
9+
// Scrub sensitive information from error messages before sending to telemetry.
10+
// Inspired by VS Code's telemetry path scrubbing and Sentry's @userpath pattern.
11+
//
12+
// References:
13+
// - VS Code: https://github.com/microsoft/vscode/blob/main/src/vs/platform/telemetry/common/telemetryUtils.ts
14+
// - Sentry: https://github.com/getsentry/relay (PII rule: @userpath)
15+
var (
16+
// Matches home directory paths on macOS and Linux. Uses a negative lookbehind
17+
// to avoid matching workspace paths like /Workspace/Users/...
18+
unixHomeDirRegexp = regexp.MustCompile(`(?:^|[\s:,"'])(/(?:Users|home)/[^\s:,"']+)`)
19+
20+
// Matches home directory paths on Windows with either backslashes or
21+
// forward slashes (C:\Users\xxx\... or C:/Users/xxx/...).
22+
windowsHomeDirRegexp = regexp.MustCompile(`[A-Z]:[/\\]Users[/\\][^\s:,"']+`)
23+
24+
// Matches email addresses. Workspace paths in Databricks often contain
25+
// emails (e.g., /Workspace/Users/user@example.com/.bundle/dev).
26+
emailRegexp = regexp.MustCompile(`[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`)
27+
)
28+
29+
// scrubForTelemetry is a best-effort scrubber that removes sensitive path and
30+
// PII information from error messages before they are sent to telemetry.
31+
// The error message is treated as PII and should not be logged without scrubbing.
32+
func scrubForTelemetry(msg, bundleRoot string) string {
33+
// Replace the bundle root path first since it's the most specific match.
34+
// This turns "/Users/shreyas/project/databricks.yml" into "./databricks.yml".
35+
if bundleRoot != "" {
36+
normalized := filepath.ToSlash(bundleRoot)
37+
msg = strings.ReplaceAll(msg, normalized+"/", "./")
38+
msg = strings.ReplaceAll(msg, normalized, ".")
39+
if bundleRoot != normalized {
40+
msg = strings.ReplaceAll(msg, bundleRoot+string(filepath.Separator), "./")
41+
msg = strings.ReplaceAll(msg, bundleRoot, ".")
42+
}
43+
}
44+
45+
// Redact remaining home directory paths that weren't covered by the
46+
// bundle root replacement. Run Windows first to avoid partial matches
47+
// from the Unix regex on paths like C:/Users/...
48+
msg = windowsHomeDirRegexp.ReplaceAllString(msg, "[REDACTED_PATH]")
49+
msg = unixHomeDirRegexp.ReplaceAllStringFunc(msg, func(match string) string {
50+
// Find where the path starts (the `/` character).
51+
// Preserve the leading delimiter character.
52+
idx := strings.Index(match, "/")
53+
return match[:idx] + "[REDACTED_PATH]"
54+
})
55+
56+
// Redact email addresses.
57+
msg = emailRegexp.ReplaceAllString(msg, "[REDACTED_EMAIL]")
58+
59+
return msg
60+
}
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package phases
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestScrubForTelemetry_BundleRootPath(t *testing.T) {
10+
tests := []struct {
11+
name string
12+
msg string
13+
bundleRoot string
14+
expected string
15+
}{
16+
{
17+
name: "replaces bundle root in file path",
18+
msg: "failed to load /home/user/project/databricks.yml: invalid config",
19+
bundleRoot: "/home/user/project",
20+
expected: "failed to load ./databricks.yml: invalid config",
21+
},
22+
{
23+
name: "replaces bundle root without trailing content",
24+
msg: "error at /home/user/project",
25+
bundleRoot: "/home/user/project",
26+
expected: "error at .",
27+
},
28+
{
29+
name: "replaces multiple occurrences",
30+
msg: "path /home/user/project/a.yml and /home/user/project/b.yml",
31+
bundleRoot: "/home/user/project",
32+
expected: "path ./a.yml and ./b.yml",
33+
},
34+
{
35+
name: "empty bundle root is no-op",
36+
msg: "some error",
37+
bundleRoot: "",
38+
expected: "some error",
39+
},
40+
{
41+
name: "empty message",
42+
msg: "",
43+
bundleRoot: "/home/user/project",
44+
expected: "",
45+
},
46+
}
47+
48+
for _, tt := range tests {
49+
t.Run(tt.name, func(t *testing.T) {
50+
assert.Equal(t, tt.expected, scrubForTelemetry(tt.msg, tt.bundleRoot))
51+
})
52+
}
53+
}
54+
55+
func TestScrubForTelemetry_HomeDirPaths(t *testing.T) {
56+
tests := []struct {
57+
name string
58+
msg string
59+
expected string
60+
}{
61+
{
62+
name: "macOS home dir",
63+
msg: "failed to read /Users/shreyas/other-project/file.yml",
64+
expected: "failed to read [REDACTED_PATH]",
65+
},
66+
{
67+
name: "Linux home dir",
68+
msg: "failed to read /home/runner/work/project/file.yml",
69+
expected: "failed to read [REDACTED_PATH]",
70+
},
71+
{
72+
name: "home dir in middle of message",
73+
msg: "error: /Users/jane/project/a.yml: not found, try again",
74+
expected: "error: [REDACTED_PATH]: not found, try again",
75+
},
76+
{
77+
name: "Windows home dir with backslashes",
78+
msg: `error at C:\Users\shreyas\project\file.yml`,
79+
expected: "error at [REDACTED_PATH]",
80+
},
81+
{
82+
name: "Windows home dir with forward slashes",
83+
msg: "error at C:/Users/shreyas/project/file.yml",
84+
expected: "error at [REDACTED_PATH]",
85+
},
86+
{
87+
name: "preserves relative paths",
88+
msg: "failed to load ./resources/job.yml",
89+
expected: "failed to load ./resources/job.yml",
90+
},
91+
{
92+
name: "preserves workspace paths without email",
93+
msg: "uploading to /Workspace/.bundle/dev/files",
94+
expected: "uploading to /Workspace/.bundle/dev/files",
95+
},
96+
}
97+
98+
for _, tt := range tests {
99+
t.Run(tt.name, func(t *testing.T) {
100+
assert.Equal(t, tt.expected, scrubForTelemetry(tt.msg, ""))
101+
})
102+
}
103+
}
104+
105+
func TestScrubForTelemetry_Emails(t *testing.T) {
106+
tests := []struct {
107+
name string
108+
msg string
109+
expected string
110+
}{
111+
{
112+
name: "email in workspace path",
113+
msg: "/Workspace/Users/user@example.com/.bundle/dev should contain current username",
114+
expected: "/Workspace/Users/[REDACTED_EMAIL]/.bundle/dev should contain current username",
115+
},
116+
{
117+
name: "plain email",
118+
msg: "access denied for user@company.io",
119+
expected: "access denied for [REDACTED_EMAIL]",
120+
},
121+
{
122+
name: "no email present",
123+
msg: "some error without emails",
124+
expected: "some error without emails",
125+
},
126+
}
127+
128+
for _, tt := range tests {
129+
t.Run(tt.name, func(t *testing.T) {
130+
assert.Equal(t, tt.expected, scrubForTelemetry(tt.msg, ""))
131+
})
132+
}
133+
}
134+
135+
func TestScrubForTelemetry_Combined(t *testing.T) {
136+
msg := "failed to load /Users/shreyas/myproject/databricks.yml: " +
137+
"workspace /Workspace/Users/shreyas@databricks.com/.bundle is invalid, " +
138+
"also tried /home/other/fallback/config.yml"
139+
140+
got := scrubForTelemetry(msg, "/Users/shreyas/myproject")
141+
142+
assert.Equal(t,
143+
"failed to load ./databricks.yml: "+
144+
"workspace /Workspace/Users/[REDACTED_EMAIL]/.bundle is invalid, "+
145+
"also tried [REDACTED_PATH]",
146+
got,
147+
)
148+
}

0 commit comments

Comments
 (0)