diff --git a/core/blob.go b/core/blob.go index a4a8063..dea9870 100644 --- a/core/blob.go +++ b/core/blob.go @@ -98,6 +98,16 @@ var ( ErrTooManyFiles = errors.New("blob: too many files") ) +// ValidationError describes why a path failed validation. +type ValidationError struct { + Path string // The path that failed validation + Reason string // "not found", "is a directory", "not a regular file", or "invalid path" +} + +func (e *ValidationError) Error() string { + return fmt.Sprintf("%s: %s", e.Reason, e.Path) +} + // ByteSource provides random access to the data blob. // // Implementations exist for local files (*os.File) and HTTP range requests. @@ -298,6 +308,40 @@ func (b *Blob) IsFile(path string) bool { return view.Mode().IsRegular() } +// ValidateFiles checks that all paths exist and are regular files. +// +// Returns the normalized paths if all are valid files, suitable for use with +// Open, ReadFile, and other Blob methods that require fs.ValidPath format. +// Returns a *ValidationError for the first path that does not exist or is not +// a regular file. +// +// Paths are normalized before validation, so "/etc/hosts" and "etc/hosts" are +// equivalent and both return "etc/hosts" in the result slice. +// +// An empty path list is valid and returns an empty slice. +func (b *Blob) ValidateFiles(paths ...string) ([]string, error) { + normalized := make([]string, len(paths)) + for i, path := range paths { + normalized[i] = NormalizePath(path) + if !fs.ValidPath(normalized[i]) { + return nil, &ValidationError{Path: path, Reason: "invalid path"} + } + + view, ok := b.idx.LookupView(normalized[i]) + if !ok { + // Not a file entry - check if it's a directory + if b.isDir(normalized[i]) { + return nil, &ValidationError{Path: path, Reason: "is a directory"} + } + return nil, &ValidationError{Path: path, Reason: "not found"} + } + if !view.Mode().IsRegular() { + return nil, &ValidationError{Path: path, Reason: "not a regular file"} + } + } + return normalized, nil +} + // ReadFile implements fs.ReadFileFS. // // ReadFile reads and returns the entire contents of the named file. diff --git a/core/blob_test.go b/core/blob_test.go index e73a2fb..39a748e 100644 --- a/core/blob_test.go +++ b/core/blob_test.go @@ -925,3 +925,99 @@ func TestBlob_DirStats_Compressed(t *testing.T) { assert.Equal(t, uint64(1800), stats.TotalBytes) // Original size assert.Less(t, stats.CompressedBytes, stats.TotalBytes) // Compressed should be smaller } + +func TestBlob_ValidateFiles(t *testing.T) { + t.Parallel() + + files := map[string][]byte{ + "file1.txt": []byte("content1"), + "file2.txt": []byte("content2"), + "dir/file.txt": []byte("nested"), + } + b := createTestArchive(t, files, CompressionNone) + + t.Run("all valid", func(t *testing.T) { + t.Parallel() + normalized, err := b.ValidateFiles("file1.txt", "file2.txt") + require.NoError(t, err) + assert.Equal(t, []string{"file1.txt", "file2.txt"}, normalized) + }) + + t.Run("empty list", func(t *testing.T) { + t.Parallel() + normalized, err := b.ValidateFiles() + require.NoError(t, err) + assert.Empty(t, normalized) + }) + + t.Run("single valid file", func(t *testing.T) { + t.Parallel() + normalized, err := b.ValidateFiles("file1.txt") + require.NoError(t, err) + assert.Equal(t, []string{"file1.txt"}, normalized) + }) + + t.Run("returns normalized paths", func(t *testing.T) { + t.Parallel() + normalized, err := b.ValidateFiles("/file1.txt", "dir/file.txt/") + require.NoError(t, err) + assert.Equal(t, []string{"file1.txt", "dir/file.txt"}, normalized) + + // Verify normalized paths work with Open + for _, p := range normalized { + f, err := b.Open(p) + require.NoError(t, err) + f.Close() + } + }) + + t.Run("file not found", func(t *testing.T) { + t.Parallel() + normalized, err := b.ValidateFiles("file1.txt", "nonexistent.txt", "file2.txt") + require.Error(t, err) + assert.Nil(t, normalized) + + var valErr *ValidationError + require.ErrorAs(t, err, &valErr) + assert.Equal(t, "nonexistent.txt", valErr.Path) + assert.Equal(t, "not found", valErr.Reason) + assert.Contains(t, err.Error(), "nonexistent.txt") + }) + + t.Run("directory not allowed", func(t *testing.T) { + t.Parallel() + normalized, err := b.ValidateFiles("dir") + require.Error(t, err) + assert.Nil(t, normalized) + + var valErr *ValidationError + require.ErrorAs(t, err, &valErr) + assert.Equal(t, "dir", valErr.Path) + assert.Equal(t, "is a directory", valErr.Reason) + assert.Contains(t, err.Error(), "directory") + }) + + t.Run("invalid path", func(t *testing.T) { + t.Parallel() + normalized, err := b.ValidateFiles("../escape") + require.Error(t, err) + assert.Nil(t, normalized) + + var valErr *ValidationError + require.ErrorAs(t, err, &valErr) + assert.Equal(t, "../escape", valErr.Path) + assert.Equal(t, "invalid path", valErr.Reason) + }) + + t.Run("preserves original path in error", func(t *testing.T) { + t.Parallel() + // Even though path is normalized, error should show original path + normalized, err := b.ValidateFiles("/nonexistent.txt") + require.Error(t, err) + assert.Nil(t, normalized) + + var valErr *ValidationError + require.ErrorAs(t, err, &valErr) + assert.Equal(t, "/nonexistent.txt", valErr.Path) + }) +} diff --git a/types.go b/types.go index f9518a9..3a0e578 100644 --- a/types.go +++ b/types.go @@ -31,6 +31,9 @@ type CopyStats = blobcore.CopyStats // DirStats contains statistics about files under a directory prefix. type DirStats = blobcore.DirStats +// ValidationError describes why a path failed validation. +type ValidationError = blobcore.ValidationError + // ByteSource provides random access to the data blob. type ByteSource = blobcore.ByteSource