Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,15 @@ If you wish, you can use multiple flags, like below. It will download objects th
Using a combination of `--include` and `--exclude` also possible. The command below will only sync objects that end with `.log` or `.txt` but exclude those that start with `access_`. For example, `request.log`, and `license.txt` will be included, while `access_log.txt`, and `readme.md` are excluded.

s5cmd sync --include "*.log" --exclude "access_*" --include "*.txt" 's3://bucket/logs/*' .

#### Listing objects with pagination

`s5cmd` supports the `--start-after` flag for the `ls` command to list objects starting after a specific key. This is useful for pagination and resuming listings from a specific point:

s5cmd ls --start-after "logs/2024/03/file100.txt" 's3://bucket/logs/*'

This will list all objects that come alphabetically after `logs/2024/03/file100.txt` in the bucket.

#### Select JSON object content using SQL

`s5cmd` supports the `SelectObjectContent` S3 operation, and will run your
Expand Down
10 changes: 9 additions & 1 deletion command/ls.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ Examples:
11. List all files with their fullpaths
> s5cmd {{.HelpName}} --show-fullpath "s3://bucket/*"

12. List objects starting after a specific key
> s5cmd {{.HelpName}} --start-after "prefix/object5.txt" "s3://bucket/prefix/*"

`

func NewListCommand() *cli.Command {
Expand Down Expand Up @@ -94,6 +97,10 @@ func NewListCommand() *cli.Command {
Name: "show-fullpath",
Usage: "shows only the fullpath names of the object(s)",
},
&cli.StringFlag{
Name: "start-after",
Usage: "start listing after this specified key",
},
},
Before: func(c *cli.Context) error {
err := validateLSCommand(c)
Expand All @@ -115,7 +122,8 @@ func NewListCommand() *cli.Command {
fullCommand := commandFromContext(c)

srcurl, err := url.New(c.Args().First(),
url.WithAllVersions(c.Bool("all-versions")))
url.WithAllVersions(c.Bool("all-versions")),
url.WithStartAfter(c.String("start-after")))
if err != nil {
printError(fullCommand, c.Command.Name, err)
return err
Expand Down
134 changes: 134 additions & 0 deletions e2e/ls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -802,3 +802,137 @@ func TestEmptyBucket(t *testing.T) {

assertLines(t, result.Stdout(), nil)
}

// ls --start-after
func TestListObjectsWithStartAfter(t *testing.T) {
// Skip if using gofakes3 backend (doesn't support StartAfter)
if !isEndpointFromEnv() {
t.Skip("gofakes3 backend doesn't support StartAfter parameter - run with S5CMD_TEST_ENDPOINT_URL to test against real S3")
}
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

// create multiple files
putFile(t, s3client, bucket, "a.txt", "content of a")
putFile(t, s3client, bucket, "b.txt", "content of b")
putFile(t, s3client, bucket, "c.txt", "content of c")
putFile(t, s3client, bucket, "d.txt", "content of d")
putFile(t, s3client, bucket, "e.txt", "content of e")

// list objects starting after "b.txt"
cmd := s5cmd("ls", "--start-after", "b.txt", "s3://"+bucket+"/")
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)

// should only see c.txt, d.txt, and e.txt (b.txt and earlier should not appear)
assertLines(t, result.Stdout(), map[int]compareFunc{
0: suffix("c.txt"),
1: suffix("d.txt"),
2: suffix("e.txt"),
}, strictLineCheck(true), trimMatch(dateRe))
}

// ls --start-after with prefix
func TestListObjectsWithStartAfterAndPrefix(t *testing.T) {
// Skip if using gofakes3 backend (doesn't support StartAfter)
if !isEndpointFromEnv() {
t.Skip("gofakes3 backend doesn't support StartAfter parameter - run with S5CMD_TEST_ENDPOINT_URL to test against real S3")
}
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

// create multiple files with prefix
putFile(t, s3client, bucket, "logs/2024/file1.txt", "content 1")
putFile(t, s3client, bucket, "logs/2024/file2.txt", "content 2")
putFile(t, s3client, bucket, "logs/2024/file3.txt", "content 3")
putFile(t, s3client, bucket, "logs/2024/file4.txt", "content 4")

// list objects starting after "logs/2024/file2.txt"
cmd := s5cmd("ls", "--start-after", "logs/2024/file2.txt", "s3://"+bucket+"/logs/2024/")
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)

// should only see file3.txt and file4.txt
assertLines(t, result.Stdout(), map[int]compareFunc{
0: suffix("file3.txt"),
1: suffix("file4.txt"),
}, strictLineCheck(true), trimMatch(dateRe))
}

// ls --start-after with wildcard filter
func TestListObjectsWithStartAfterAndWildcard(t *testing.T) {
// Skip if using gofakes3 backend (doesn't support StartAfter)
if !isEndpointFromEnv() {
t.Skip("gofakes3 backend doesn't support StartAfter parameter - run with S5CMD_TEST_ENDPOINT_URL to test against real S3")
}
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

// create multiple files with prefix
putFile(t, s3client, bucket, "logs/2024/file1.txt", "content 1")
putFile(t, s3client, bucket, "logs/2024/file2.txt", "content 2")
putFile(t, s3client, bucket, "logs/2024/file3.txt", "content 3")
putFile(t, s3client, bucket, "logs/2024/file4.txt", "content 4")
putFile(t, s3client, bucket, "logs/2024/file5.log", "content 5")

// list objects with wildcard starting after "logs/2024/file2.txt"
cmd := s5cmd("ls", "--start-after", "logs/2024/file2.txt", "s3://"+bucket+"/logs/2024/*")
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)

// should only see file3.txt, file4.txt, and file5.log (after file2.txt)
assertLines(t, result.Stdout(), map[int]compareFunc{
0: suffix("file3.txt"),
1: suffix("file4.txt"),
2: suffix("file5.log"),
}, strictLineCheck(true), trimMatch(dateRe))
}

// ls --start-after with wildcard filter matching specific extension
func TestListObjectsWithStartAfterAndWildcardExtension(t *testing.T) {
// Skip if using gofakes3 backend (doesn't support StartAfter)
if !isEndpointFromEnv() {
t.Skip("gofakes3 backend doesn't support StartAfter parameter - run with S5CMD_TEST_ENDPOINT_URL to test against real S3")
}
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

// create multiple files with prefix
putFile(t, s3client, bucket, "logs/2024/file1.txt", "content 1")
putFile(t, s3client, bucket, "logs/2024/file2.txt", "content 2")
putFile(t, s3client, bucket, "logs/2024/file3.txt", "content 3")
putFile(t, s3client, bucket, "logs/2024/file4.txt", "content 4")
putFile(t, s3client, bucket, "logs/2024/file5.log", "content 5")

// list only .txt files with wildcard starting after "logs/2024/file2.txt"
cmd := s5cmd("ls", "--start-after", "logs/2024/file2.txt", "s3://"+bucket+"/logs/2024/*.txt")
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)

// should only see file3.txt and file4.txt (file5.log should be filtered out by wildcard)
assertLines(t, result.Stdout(), map[int]compareFunc{
0: suffix("file3.txt"),
1: suffix("file4.txt"),
}, strictLineCheck(true), trimMatch(dateRe))
}

4 changes: 4 additions & 0 deletions storage/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,10 @@ func (s *S3) listObjectsV2(ctx context.Context, url *url.URL) <-chan *Object {
listInput.SetDelimiter(url.Delimiter)
}

if url.StartAfter != "" {
listInput.SetStartAfter(url.StartAfter)
}

objCh := make(chan *Object)

go func() {
Expand Down
7 changes: 7 additions & 0 deletions storage/url/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type URL struct {
Prefix string
VersionID string
AllVersions bool
StartAfter string

relativePath string
filter string
Expand Down Expand Up @@ -75,6 +76,12 @@ func WithAllVersions(isAllVersions bool) Option {
}
}

func WithStartAfter(startAfter string) Option {
return func(u *URL) {
u.StartAfter = startAfter
}
}

// New creates a new URL from given path string.
func New(s string, opts ...Option) (*URL, error) {
scheme, rest, isFound := strings.Cut(s, "://")
Expand Down
Loading