textar/textar.go at main · ypsu/textar · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// Package textar encodes a file list (key-value slice) into a human editable text file and vice versa.
// This is inspired by https://pkg.go.dev/golang.org/x/tools/txtar but this format can encode any content perfectly without issues.
// Go's txtar doesn't handle newlines and content containing txtar markers well.
// In textar each file is encoded via "[SEP] [NAME]\n[CONTENT]\n".
// SEP is two or more = signs.
// The first SEP can be arbitrary length, the rest must be the same length.
// The first line beginning with == determines the separator length.
// The dynamic SEP-length makes it possible to encode and decode anything perfectly.
// This is the main advantage over Go's txtar.
// Furthermore anything before the first SEP is a free form comment.
// Example:
//
//	Some comments here.
//
//	=== file1
//	file1 content.
//
//	=== file2
//	file2 content.
//	== file3
//	this is a textar within textar.
//
// The separator here is === so this textar contains file1 and file2.
// file3 is not part of the main textar, it just shows that file2 could be a textar itself.
//
// The [Archive.Format] function automatically picks a separator length that is unique and won't conflict with existing file values.
// Use [Parse] to parse it back.
//
// See https://github.com/ypsu/textar/blob/main/example/seq.textar for a longer example.
// See the testdata directory of https://github.com/ypsu/pkgtrim for a more realistic example.
package textar

import (
	"bytes"
	"fmt"
	"iter"
	"math"
	"os"
	"strings"
	"testing/fstest"
)

// A File is a single file in an archive.
type File struct {
	Name string
	Data []byte
}

// An Archive is a collection of files.
type Archive struct {
	Comment []byte
	Files   []File
}

// Parse parses the serialized form of an Archive. The returned Archive holds slices of data.
func Parse(data []byte) *Archive {
	a, p := &Archive{}, data
	if len(data) <= 2 {
		a.Comment = p
		return a
	}

	// Find the separator string.
	sep := make([]byte, 0, 5)
	sep = append(sep, '\n', '=', '=')
	if p[0] == '=' && p[1] == '=' {
		p = p[2:]
	} else {
		var ok bool
		a.Comment, p, ok = bytes.Cut(p, []byte("\n=="))
		if !ok {
			// Empty textar, treat the whole file as a big comment.
			a.Comment = data
			return a
		}
	}
	for len(p) > 0 && p[0] == '=' {
		p, sep = p[1:], append(sep, '=')
	}
	if len(p) == 0 || p[0] != ' ' {
		// Invalid textar, treat the whole file as a big comment.
		a.Comment = data
		return a
	}
	sep, p = append(sep, ' '), p[1:]

	// Populate the Files field.
	for {
		var name, data []byte
		var ok bool
		name, p, ok = bytes.Cut(p, []byte("\n"))
		if !ok {
			break
		}
		data, p, _ = bytes.Cut(p, sep)
		a.Files = append(a.Files, File{string(name), data})
	}
	return a
}

// ParseFile parses the named file as an archive.
func ParseFile(file string) (*Archive, error) {
	data, err := os.ReadFile(file)
	if err != nil {
		return nil, fmt.Errorf("textar.ReadFile: %v", err)
	}
	return Parse(data), nil
}

// Format an archive into a byte stream with custom settings.
func (a *Archive) Format() []byte {
	if a == nil {
		return nil
	}

	// Compute the separator.
	separator := []byte{'\n'} // the full separator: newline, equal signs, space
	sepcnt := 2
	for _, f := range a.Files {
		run := math.MinInt
		for _, ch := range f.Data {
			switch ch {
			case '\n':
				run = 1
			case '=':
				run++
			case ' ':
				sepcnt = max(sepcnt, run)
			default:
				run = math.MinInt
			}
		}
	}
	separator = append(separator, bytes.Repeat([]byte{'='}, sepcnt)...)
	separator = append(separator, ' ')

	// Generate the archive.
	p := &bytes.Buffer{}
	p.Write(a.Comment)
	d := p.Bytes()
	if bytes.HasPrefix(d, []byte("=")) {
		d[0] = 'X'
	}
	for i := 1; i < len(d); i++ {
		if d[i-1] == '\n' && d[i] == '=' {
			d[i] = 'X'
		}
	}
	for i, f := range a.Files {
		if i == 0 && len(a.Comment) == 0 {
			p.Write(separator[1:])
		} else {
			p.Write(separator)
		}
		p.WriteString(strings.ReplaceAll(f.Name, "\n", `\n`))
		p.WriteByte('\n')
		p.Write(f.Data)
	}
	return p.Bytes()
}

// Range iterates over the Files.
func (a *Archive) Range() iter.Seq2[string, []byte] {
	return func(yield func(name string, data []byte) bool) {
		for _, file := range a.Files {
			if !yield(file.Name, file.Data) {
				return
			}
		}
	}
}

// FS returns an object implementing [io/fs.FS] built from the contents of an archive.
// This is a helper function for tests.
func (a *Archive) FS() fstest.MapFS {
	fs := fstest.MapFS{}
	for name, data := range a.Range() {
		fs[strings.TrimPrefix(name, "/")] = &fstest.MapFile{Data: data, Mode: 0644}
	}
	return fs
}