From cf739f5776f09254e3238303f056af52360f09ec Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 3 Sep 2021 13:19:59 +0200 Subject: [PATCH 1/4] Demonstrate in unit tests how printf truncation works The upstream printf code already has support for string truncation. This commit demonstrates/tests it via unit tests. --- markers_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/markers_test.go b/markers_test.go index 18f216e..e8a8f8b 100644 --- a/markers_test.go +++ b/markers_test.go @@ -204,6 +204,37 @@ func TestPrinter(t *testing.T) { {func(w p) { w.Print(buf) }, "safe ‹unsafe›"}, {func(w p) { w.Printf("%v", &buf) }, "safe ‹unsafe›"}, {func(w p) { w.Print(&buf) }, "safe ‹unsafe›"}, + + // Redaction supports truncation for simple strings like printf. + {func(w p) { w.Printf("tr %.3s %.3s", "hello", Safe("world")) }, `tr ‹hel› wor`}, + {func(w p) { w.Printf("tr %.3q %.3q", "hello", Safe("world")) }, `tr ‹"hel"› "wor"`}, + {func(w p) { w.Printf("tr %.3v %.3v", "hello", Safe("world")) }, `tr ‹hel› wor`}, + {func(w p) { w.Printf("tr %#.3q %#.3q", "hello", Safe("world")) }, "tr ‹`hel`› `wor`"}, + {func(w p) { w.Printf("tr %.1T", 123) }, `tr i`}, + {func(w p) { w.Printf("tr %.3v", map[string]string{"hello": "world"}) }, `tr map[‹hel›:‹wor›]`}, + // Note that the precision affects the amount of input represented, not the size of the output. + // For example, with hexadecimal output. + {func(w p) { w.Printf("tr %.3x", "hello") }, `tr ‹68656c›`}, + // Respect UTF-8 boundaries. + {func(w p) { w.Printf("tr %.1v", "☃☀") }, `tr ‹☃›`}, + // Byte arrays are either represented as byte values or strings. + {func(w p) { w.Printf("tr %.1v %.1v", []byte("hello"), Safe([]byte("world"))) }, + `tr [‹104› ‹101› ‹108› ‹108› ‹111›] [119 111 114 108 100]`}, + {func(w p) { w.Printf("tr %.3s %.3s", []byte("hello"), Safe([]byte("world"))) }, `tr ‹hel› wor`}, + // Special values do not get truncated. + {func(w p) { w.Printf("tr %.1v", nil) }, `tr `}, + // Simple objects like booleans, integers etc do not get truncated. + {func(w p) { w.Printf("tr %.1v %.1v %.1v", 123, true, 11i) }, `tr ‹123› ‹true› (‹0›‹+1e+01›i)`}, + {func(w p) { w.Printf("tr %.1v %.1v %.1v", Safe(123), Safe(true), Safe(11i)) }, `tr 123 true (0+1e+01i)`}, + // Redactable strings do not get truncated by precision, because they have internal structure + // that could be broken by truncation. + {func(w p) { w.Printf("tr %.3v", RedactableString("‹hello› world")) }, `tr ‹hello› world`}, + // By default, complex objects with a SafeFormat function do not get truncated by precision. + // This is because SafeFormat is itself responsible for implementing truncation by recognizing + // the precision field. + {func(w p) { w.Printf("tr %.3v", &safeNil{}) }, `tr hello ‹world›`}, + {func(w p) { w.Printf("tr %.3v", buf) }, "tr safe ‹unsafe›"}, + } var methods = []struct { From 5766a3a613f0e3a7be95a078114b4b220afa3a75 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 3 Sep 2021 11:05:59 +0200 Subject: [PATCH 2/4] Fix the diff on format.go This should have been done when format.go moved to a new package. --- internal/rfmt/format.go.diff | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/internal/rfmt/format.go.diff b/internal/rfmt/format.go.diff index 7bf918c..4105965 100644 --- a/internal/rfmt/format.go.diff +++ b/internal/rfmt/format.go.diff @@ -1,12 +1,18 @@ ---- format.go.orig 2021-06-18 16:45:51.065116000 +0200 -+++ format.go 2021-06-18 17:30:58.628179000 +0200 -@@ -1,3 +1,6 @@ +--- format.go.orig 2021-09-03 11:01:24.164634000 +0200 ++++ format.go 2021-09-03 11:05:03.341135000 +0200 +@@ -1,8 +1,11 @@ +// Code generated from the Go standard library. DO NOT EDIT +// GENERATED FILE DO NOT EDIT +// // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + +-package fmt ++package rfmt + + import ( + "strconv" @@ -65,25 +68,17 @@ if n <= 0 { // No padding bytes needed. return From 427fc48f2b7ffd75442950ec8ec2f8e503719c56 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 3 Sep 2021 11:06:29 +0200 Subject: [PATCH 3/4] Refresh the printf code from upstream go 1.17 (Using the refresh.sh script) --- internal/rfmt/fmtsort/sort.go | 2 +- internal/rfmt/format.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/rfmt/fmtsort/sort.go b/internal/rfmt/fmtsort/sort.go index 9d2d580..a64d5cd 100644 --- a/internal/rfmt/fmtsort/sort.go +++ b/internal/rfmt/fmtsort/sort.go @@ -132,7 +132,7 @@ func compare(aVal, bVal reflect.Value) int { default: return -1 } - case reflect.Ptr: + case reflect.Ptr, reflect.UnsafePointer: a, b := aVal.Pointer(), bVal.Pointer() switch { case a < b: diff --git a/internal/rfmt/format.go b/internal/rfmt/format.go index 49296f4..56c81d3 100644 --- a/internal/rfmt/format.go +++ b/internal/rfmt/format.go @@ -315,7 +315,7 @@ func (f *fmt) fmtInteger(u uint64, base int, isSigned bool, verb rune, digits st f.zero = oldZero } -// truncate truncates the string s to the specified precision, if present. +// truncateString truncates the string s to the specified precision, if present. func (f *fmt) truncateString(s string) string { if f.precPresent { n := f.prec From afd883e2c682b5ffcb0a6c4a2dfc2711a105edad Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 3 Sep 2021 13:29:01 +0200 Subject: [PATCH 4/4] New verb flag '!' to report truncation ellipsis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the flag `!` is passed in combination with a precision (e.g. `%!.3v`), the truncation code now adds an ellipsis (`…`) if truncation has actually occurred. For example: `printf("%!.3s", "hel")` -> `‹hel›` `printf("%!.3s", "hello")` -> `‹hel…›` --- internal/rfmt/format.go | 26 ++++++++++++- internal/rfmt/format.go.diff | 71 ++++++++++++++++++++++++++++++++++-- internal/rfmt/print.go | 2 + internal/rfmt/print.go.diff | 17 +++++++-- markers_test.go | 11 ++++++ 5 files changed, 117 insertions(+), 10 deletions(-) diff --git a/internal/rfmt/format.go b/internal/rfmt/format.go index 56c81d3..7d953cd 100644 --- a/internal/rfmt/format.go +++ b/internal/rfmt/format.go @@ -15,6 +15,8 @@ import ( const ( ldigits = "0123456789abcdefx" udigits = "0123456789ABCDEFX" + + ellipsisS = "…" ) const ( @@ -32,6 +34,9 @@ type fmtFlags struct { space bool zero bool + // ellipsis tells printf to insert an ellipsis after a truncated value. + ellipsis bool + // For the formats %+v %#v, we set the plusV/sharpV flags // and clear the plus/sharp flags since %+v and %#v are in effect // different, flagless formats set at the top level. @@ -322,7 +327,11 @@ func (f *fmt) truncateString(s string) string { for i := range s { n-- if n < 0 { - return s[:i] + res := s[:i] + if f.ellipsis { + res += ellipsisS + } + return res } } } @@ -336,7 +345,11 @@ func (f *fmt) truncate(b []byte) []byte { for i := 0; i < len(b); { n-- if n < 0 { - return b[:i] + res := b[:i] + if f.ellipsis { + res = append(res, ellipsisS...) + } + return res } wid := 1 if b[i] >= utf8.RuneSelf { @@ -368,8 +381,12 @@ func (f *fmt) fmtSbx(s string, b []byte, digits string) { length = len(s) } // Set length to not process more bytes than the precision demands. + addEllipsis := false if f.precPresent && f.prec < length { length = f.prec + if f.ellipsis { + addEllipsis = true + } } // Compute width of the encoding taking into account the f.sharp and f.space flag. width := 2 * length @@ -421,6 +438,11 @@ func (f *fmt) fmtSbx(s string, b []byte, digits string) { f.buf.WriteByte(digits[c>>4]) f.buf.WriteByte(digits[c&0xF]) } + // If an ellipsis was requested, add it. + if addEllipsis { + f.buf.writeString(ellipsisS) + } + // Handle padding to the right. if f.widPresent && f.wid > width && f.minus { f.writePadding(f.wid - width) diff --git a/internal/rfmt/format.go.diff b/internal/rfmt/format.go.diff index 4105965..762e11b 100644 --- a/internal/rfmt/format.go.diff +++ b/internal/rfmt/format.go.diff @@ -1,5 +1,5 @@ --- format.go.orig 2021-09-03 11:01:24.164634000 +0200 -+++ format.go 2021-09-03 11:05:03.341135000 +0200 ++++ format.go 2021-09-03 11:06:51.632979000 +0200 @@ -1,8 +1,11 @@ +// Code generated from the Go standard library. DO NOT EDIT +// GENERATED FILE DO NOT EDIT @@ -13,7 +13,26 @@ import ( "strconv" -@@ -65,25 +68,17 @@ +@@ -12,6 +15,8 @@ + const ( + ldigits = "0123456789abcdefx" + udigits = "0123456789ABCDEFX" ++ ++ ellipsisS = "…" + ) + + const ( +@@ -29,6 +34,9 @@ + space bool + zero bool + ++ // ellipsis tells printf to insert an ellipsis after a truncated value. ++ ellipsis bool ++ + // For the formats %+v %#v, we set the plusV/sharpV flags + // and clear the plus/sharp flags since %+v and %#v are in effect + // different, flagless formats set at the top level. +@@ -65,25 +73,17 @@ if n <= 0 { // No padding bytes needed. return } @@ -42,7 +61,46 @@ } // pad appends b to f.buf, padded on left (!f.minus) or right (f.minus). -@@ -401,19 +396,20 @@ +@@ -327,7 +327,11 @@ + for i := range s { + n-- + if n < 0 { +- return s[:i] ++ res := s[:i] ++ if f.ellipsis { ++ res += ellipsisS ++ } ++ return res + } + } + } +@@ -341,7 +345,11 @@ + for i := 0; i < len(b); { + n-- + if n < 0 { +- return b[:i] ++ res := b[:i] ++ if f.ellipsis { ++ res = append(res, ellipsisS...) ++ } ++ return res + } + wid := 1 + if b[i] >= utf8.RuneSelf { +@@ -373,8 +381,12 @@ + length = len(s) + } + // Set length to not process more bytes than the precision demands. ++ addEllipsis := false + if f.precPresent && f.prec < length { + length = f.prec ++ if f.ellipsis { ++ addEllipsis = true ++ } + } + // Compute width of the encoding taking into account the f.sharp and f.space flag. + width := 2 * length +@@ -401,19 +413,20 @@ f.writePadding(f.wid - width) } // Write the encoding directly into the output buffer. @@ -67,7 +125,7 @@ } } if b != nil { -@@ -422,9 +418,9 @@ +@@ -422,9 +435,14 @@ c = s[i] // Take a byte from the input string. } // Encode each byte as two hexadecimal digits. @@ -76,6 +134,11 @@ + f.buf.WriteByte(digits[c&0xF]) } - *f.buf = buf ++ // If an ellipsis was requested, add it. ++ if addEllipsis { ++ f.buf.writeString(ellipsisS) ++ } ++ // Handle padding to the right. if f.widPresent && f.wid > width && f.minus { f.writePadding(f.wid - width) diff --git a/internal/rfmt/print.go b/internal/rfmt/print.go index 16ba3ce..b105bd0 100644 --- a/internal/rfmt/print.go +++ b/internal/rfmt/print.go @@ -1129,6 +1129,8 @@ formatLoop: p.fmt.zero = false // Do not pad with zeros to the right. case ' ': p.fmt.space = true + case '!': + p.fmt.ellipsis = true default: // Fast path for common case of ascii lower case simple verbs // without precision or width or argument indices. diff --git a/internal/rfmt/print.go.diff b/internal/rfmt/print.go.diff index 997e138..b63f0fa 100644 --- a/internal/rfmt/print.go.diff +++ b/internal/rfmt/print.go.diff @@ -1,5 +1,5 @@ ---- print.go.orig 2021-06-18 16:45:56.623967000 +0200 -+++ print.go 2021-06-19 16:26:32.751061000 +0200 +--- print.go.orig 2021-09-03 11:01:24.167095000 +0200 ++++ print.go 2021-09-03 11:06:51.631705000 +0200 @@ -1,16 +1,27 @@ +// Code generated from print.go.orig. DO NOT EDIT +// GENERATED FILE DO NOT EDIT @@ -471,7 +471,16 @@ end := len(format) argNum := 0 // we process one argument per non-trivial format afterIndex := false // previous item in format was an index like [3]. -@@ -1147,6 +1265,7 @@ +@@ -1011,6 +1129,8 @@ + p.fmt.zero = false // Do not pad with zeros to the right. + case ' ': + p.fmt.space = true ++ case '!': ++ p.fmt.ellipsis = true + default: + // Fast path for common case of ascii lower case simple verbs + // without precision or width or argument indices. +@@ -1147,6 +1267,7 @@ } func (p *pp) doPrint(a []interface{}) { @@ -479,7 +488,7 @@ prevString := false for argNum, arg := range a { isString := arg != nil && reflect.TypeOf(arg).Kind() == reflect.String -@@ -1162,6 +1281,7 @@ +@@ -1162,6 +1283,7 @@ // doPrintln is like doPrint but always adds a space between arguments // and a newline after the last argument. func (p *pp) doPrintln(a []interface{}) { diff --git a/markers_test.go b/markers_test.go index e8a8f8b..d2f390c 100644 --- a/markers_test.go +++ b/markers_test.go @@ -235,6 +235,17 @@ func TestPrinter(t *testing.T) { {func(w p) { w.Printf("tr %.3v", &safeNil{}) }, `tr hello ‹world›`}, {func(w p) { w.Printf("tr %.3v", buf) }, "tr safe ‹unsafe›"}, + // Additionally, it can report when truncation has occurred. + {func(w p) { w.Printf("tre %!.3v %!.3v", "hello", Safe("world")) }, `tre ‹hel…› wor…`}, + {func(w p) { w.Printf("tre %!.3s %!.3s", "hello", Safe("world")) }, `tre ‹hel…› wor…`}, + {func(w p) { w.Printf("tre %!.3q %!.3q", "hello", Safe("world")) }, `tre ‹"hel…"› "wor…"`}, + {func(w p) { w.Printf("tre %!#.3q %!#.3q", "hello", Safe("world")) }, "tre ‹`hel…`› `wor…`"}, + {func(w p) { w.Printf("tre %!.3v", map[string]string{"hello": "world"}) }, `tre map[‹hel…›:‹wor…›]`}, + {func(w p) { w.Printf("tre %!.1T", 123) }, `tre i…`}, + {func(w p) { w.Printf("tre %!.3s %!.3s", []byte("hello"), Safe([]byte("world"))) }, `tre ‹hel…› wor…`}, + {func(w p) { w.Printf("tre %!.3x", "hello") }, `tre ‹68656c…›`}, + {func(w p) { w.Printf("tre %!.1v", "☃☀") }, `tre ‹☃…›`}, + } var methods = []struct {