From 66b2de797716d2342093ca78233f0f193f434f7d Mon Sep 17 00:00:00 2001 From: or-shachar Date: Thu, 16 Nov 2023 19:51:49 +0200 Subject: [PATCH] Added S3 support Signed-off-by: or-shachar --- README.md | 10 +++ cachers/s3.go | 192 ++++++++++++++++++++++++++++++++++++++++ cmd/go-cacher/cacher.go | 81 +++++++++++++++-- go.mod | 26 +++++- go.sum | 39 ++++++++ 5 files changed, 341 insertions(+), 7 deletions(-) create mode 100644 cachers/s3.go diff --git a/README.md b/README.md index e06e42a..0ea4d7b 100644 --- a/README.md +++ b/README.md @@ -40,3 +40,13 @@ $ GOCACHEPROG="$HOME/go/bin/go-cacher --verbose" go install std Defaulting to cache dir /home/bradfitz/.cache/go-cacher ... cacher: closing; 808 gets (808 hits, 0 misses, 0 errors); 0 puts (0 errors) ``` + +## S3 Support +We support S3 backend for caching. +You can connect to S3 backend by setting the following parameters: +- `GOCACHE_S3_BUCKET` - Name of S3 bucket +- `GOCACHE_AWS_REGION` - AWS Region of bucket +- `GOCACHE_AWS_ACCESS_KEY` + `GOCACHE_AWS_SECRET_KEY` / `GOCACHE_AWS_PROFILE` - Direct credentials or creds profile to use. +- `GOCACHE_CACHE_KEY` - (Optional, default `v1`) Unique key + +The cache would be stored to `s3:///cache////` diff --git a/cachers/s3.go b/cachers/s3.go new file mode 100644 index 0000000..963da34 --- /dev/null +++ b/cachers/s3.go @@ -0,0 +1,192 @@ +package cachers + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "runtime" + "strings" + + "github.com/aws/smithy-go" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +type S3Cache struct { + Bucket string + cfg *aws.Config + // diskCache is where to write the output files to local disk, as required by the + // cache protocol. + diskCache *DiskCache + + prefix string + // verbose optionally specifies whether to log verbose messages. + verbose bool + + s3Client *s3.Client +} + +func NewS3Cache(bucketName string, cfg *aws.Config, cacheKey string, disk *DiskCache, verbose bool) *S3Cache { + // get current architecture + arc := runtime.GOARCH + // get current operating system + os := runtime.GOOS + // get current version of Go + ver := strings.ReplaceAll(strings.ReplaceAll(runtime.Version(), " ", "-"), ":", "-") + prefix := fmt.Sprintf("cache/%s/%s/%s/%s", cacheKey, arc, os, ver) + log.Printf("S3Cache: configured to s3://%s/%s", bucketName, prefix) + return &S3Cache{ + Bucket: bucketName, + cfg: cfg, + diskCache: disk, + prefix: prefix, + verbose: verbose, + } +} + +func (c *S3Cache) client(ctx context.Context) (*s3.Client, error) { + if c.s3Client != nil { + return c.s3Client, nil + } + c.s3Client = s3.NewFromConfig(*c.cfg) + return c.s3Client, nil +} + +func isNotFoundError(err error) bool { + if err != nil { + var ae smithy.APIError + if errors.As(err, &ae) { + code := ae.ErrorCode() + return code == "AccessDenied" || code == "NoSuchKey" + } + } + return false +} + +func (c *S3Cache) Get(ctx context.Context, actionID string) (outputID, diskPath string, err error) { + outputID, diskPath, err = c.diskCache.Get(ctx, actionID) + if err == nil && outputID != "" { + return outputID, diskPath, nil + } + client, err := c.client(ctx) + if err != nil { + if c.verbose { + log.Printf("error getting S3 client: %v", err) + } + return "", "", err + } + actionKey := c.actionKey(actionID) + result, err := client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: &c.Bucket, + Key: &actionKey, + }) + // handle object not found + if isNotFoundError(err) { + return "", "", nil + } else if err != nil { + if c.verbose { + log.Printf("error S3 get for %s: %v", actionKey, err) + } + return "", "", fmt.Errorf("unexpected S3 get for %s: %v", actionKey, err) + } + defer result.Body.Close() + var av ActionValue + body, err := io.ReadAll(result.Body) + if err != nil { + return "", "", err + } + if err := json.Unmarshal(body, &av); err != nil { + if c.verbose { + log.Printf("error unmarshalling JSON for %s: %v", actionKey, err) + } + return "", "", err + } + + outputID = av.OutputID + + var putBody io.Reader + if av.Size == 0 { + putBody = bytes.NewReader(nil) + } else { + outputKey := c.outputKey(outputID) + outputResult, getOutputErr := client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: &c.Bucket, + Key: &outputKey, + }) + if isNotFoundError(getOutputErr) { + // handle object not found + return "", "", nil + } else if getOutputErr != nil { + if c.verbose { + log.Printf("error S3 get for %s: %v", outputKey, getOutputErr) + } + return "", "", fmt.Errorf("unexpected S3 get for %s: %v", outputKey, getOutputErr) + } + defer outputResult.Body.Close() + + putBody = outputResult.Body + } + diskPath, err = c.diskCache.Put(ctx, actionID, outputID, av.Size, putBody) + return outputID, diskPath, err +} +func (c *S3Cache) actionKey(actionID string) string { + return fmt.Sprintf("%s/actions/%s", c.prefix, actionID) +} + +func (c *S3Cache) outputKey(outputID string) string { + return fmt.Sprintf("%s/output/%s", c.prefix, outputID) +} + +func (c *S3Cache) Put(ctx context.Context, actionID, outputID string, size int64, body io.Reader) (diskPath string, _ error) { + // Write to disk locally as we write it remotely, as we need to guarantee + // it's on disk locally for the caller. + var readerForDisk io.Reader + var readerForS3 bytes.Buffer + + if size == 0 { + // Special case the empty file so NewRequest sets "Content-Length: 0", + // as opposed to thinking we didn't set it and not being able to sniff its size + // from the type. + readerForDisk = bytes.NewReader(nil) + } else { + readerForDisk = io.TeeReader(body, &readerForS3) + } + + diskPath, err := c.diskCache.Put(ctx, actionID, outputID, size, readerForDisk) + if err != nil { + return "", err + } + + client, err := c.client(ctx) + if err != nil { + return "", err + } + av := ActionValue{ + OutputID: outputID, + Size: size, + } + avj, err := json.Marshal(av) + if err == nil { + actionKey := c.actionKey(actionID) + _, err = client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: &c.Bucket, + Key: &actionKey, + Body: bytes.NewReader(avj), + }) + } + if size > 0 && err == nil { + outputKey := c.outputKey(outputID) + _, err = client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: &c.Bucket, + Key: &outputKey, + Body: &readerForS3, + ContentLength: size, + }) + } + return +} diff --git a/cmd/go-cacher/cacher.go b/cmd/go-cacher/cacher.go index 81309f0..6f40506 100644 --- a/cmd/go-cacher/cacher.go +++ b/cmd/go-cacher/cacher.go @@ -6,37 +6,97 @@ package main import ( + "context" "flag" "log" "os" "path/filepath" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/bradfitz/go-tool-cache/cacheproc" "github.com/bradfitz/go-tool-cache/cachers" ) +const defaultCacheKey = "v1" + var ( - dir = flag.String("cache-dir", "", "cache directory; empty means automatic") serverBase = flag.String("cache-server", "", "optional cache server HTTP prefix (scheme and authority only); should be low latency. empty means to not use one.") verbose = flag.Bool("verbose", false, "be verbose") ) +func getAwsConfigFromEnv() (*aws.Config, error) { + // read from env + awsRegion, awsRegionOk := os.LookupEnv("GOCACHE_AWS_REGION") + if !awsRegionOk { + return nil, nil + } + accessKey, accessKeyOk := os.LookupEnv("GOCACHE_AWS_ACCESS_KEY") + secretAccessKey, secretKeyOk := os.LookupEnv("GOCACHE_AWS_SECRET_ACCESS_KEY") + if accessKeyOk && secretKeyOk { + cfg, err := config.LoadDefaultConfig(context.TODO(), + config.WithRegion(awsRegion), + config.WithCredentialsProvider(credentials.StaticCredentialsProvider{ + Value: aws.Credentials{ + AccessKeyID: accessKey, + SecretAccessKey: secretAccessKey, + }, + })) + if err != nil { + return nil, err + } + return &cfg, nil + } + credsProfile, credsProfileOk := os.LookupEnv("GOCACHE_CREDS_PROFILE") + if credsProfileOk { + cfg, err := config.LoadDefaultConfig(context.TODO(), config.WithRegion(awsRegion), config.WithSharedConfigProfile(credsProfile)) + if err != nil { + return nil, err + } + return &cfg, nil + } + return nil, nil +} + +func maybeS3Cache(dc *cachers.DiskCache) (*cachers.S3Cache, error) { + awsConfig, err := getAwsConfigFromEnv() + if err != nil { + return nil, err + } + bucket, ok := os.LookupEnv("GOCACHE_S3_BUCKET") + if !ok || awsConfig == nil { + // We need at least name of bucket and valid aws config + return nil, nil + } + cacheKey := os.Getenv("GOCACHE_CACHE_KEY") + if cacheKey == "" { + cacheKey = defaultCacheKey + } + + s3Cache := cachers.NewS3Cache(bucket, awsConfig, cacheKey, dc, *verbose) + return s3Cache, nil +} + func main() { flag.Parse() - if *dir == "" { + dir := os.Getenv("GOCACHE_DISK_DIR") + if dir == "" { d, err := os.UserCacheDir() if err != nil { log.Fatal(err) } d = filepath.Join(d, "go-cacher") - log.Printf("Defaulting to cache dir %v ...", d) - *dir = d + dir = d } - if err := os.MkdirAll(*dir, 0755); err != nil { + log.Printf("cache dir %v ...", dir) + if err := os.MkdirAll(dir, 0755); err != nil { log.Fatal(err) } - dc := &cachers.DiskCache{Dir: *dir} + dc := &cachers.DiskCache{Dir: dir} var p *cacheproc.Process p = &cacheproc.Process{ @@ -61,6 +121,15 @@ func main() { p.Put = hc.Put } + s3Cache, err := maybeS3Cache(dc) + if err != nil { + log.Fatal(err) + } + if s3Cache != nil { + p.Get = s3Cache.Get + p.Put = s3Cache.Put + } + if err := p.Run(); err != nil { log.Fatal(err) } diff --git a/go.mod b/go.mod index 6d31cbe..79d5136 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,27 @@ module github.com/bradfitz/go-tool-cache -go 1.20 +go 1.21 + +require ( + github.com/aws/aws-sdk-go-v2 v1.22.2 + github.com/aws/aws-sdk-go-v2/config v1.23.0 + github.com/aws/aws-sdk-go-v2/credentials v1.15.2 + github.com/aws/aws-sdk-go-v2/service/s3 v1.42.1 +) + +require ( + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.0 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.2 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.2 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.6.0 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.2 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.17.1 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.19.1 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.25.1 // indirect + github.com/aws/smithy-go v1.16.0 // indirect +) diff --git a/go.sum b/go.sum index e69de29..a501c30 100644 --- a/go.sum +++ b/go.sum @@ -0,0 +1,39 @@ +github.com/aws/aws-sdk-go-v2 v1.22.2 h1:lV0U8fnhAnPz8YcdmZVV60+tr6CakHzqA6P8T46ExJI= +github.com/aws/aws-sdk-go-v2 v1.22.2/go.mod h1:Kd0OJtkW3Q0M0lUWGszapWjEvrXDzRW+D21JNsroB+c= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.0 h1:hHgLiIrTRtddC0AKcJr5s7i/hLgcpTt+q/FKxf1Zayk= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.0/go.mod h1:w4I/v3NOWgD+qvs1NPEwhd++1h3XPHFaVxasfY6HlYQ= +github.com/aws/aws-sdk-go-v2/config v1.23.0 h1:kqzEfGGDIrRJpfJckgwuZfFTbU9NB1jZnRcaO9MpOqE= +github.com/aws/aws-sdk-go-v2/config v1.23.0/go.mod h1:p7wbxKXXjS1GGQOss7VXOazVMFF9bjUGq85/4wR/fSw= +github.com/aws/aws-sdk-go-v2/credentials v1.15.2 h1:rKH7khRMxPdD0u3dHecd0Q7NOVw3EUe7AqdkUOkiOGI= +github.com/aws/aws-sdk-go-v2/credentials v1.15.2/go.mod h1:tXM8wmaeAhfC7nZoCxb0FzM/aRaB1m1WQ7x0qlBLq80= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.3 h1:G5KawTAkyHH6WyKQCdHiW4h3PmAXNJpOgwKg3H7sDRE= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.3/go.mod h1:hugKmSFnZB+HgNI1sYGT14BUPZkO6alC/e0AWu+0IAQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.2 h1:AaQsr5vvGR7rmeSWBtTCcw16tT9r51mWijuCQhzLnq8= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.2/go.mod h1:o1IiRn7CWocIFTXJjGKJDOwxv1ibL53NpcvcqGWyRBA= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.2 h1:UZx8SXZ0YtzRiALzYAWcjb9Y9hZUR7MBKaBQ5ouOjPs= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.2/go.mod h1:ipuRpcSaklmxR6C39G187TpBAO132gUfleTGccUPs8c= +github.com/aws/aws-sdk-go-v2/internal/ini v1.6.0 h1:hwZB07/beLiCopuRKF0t+dEHmP39iN4YtDh3X5d3hrg= +github.com/aws/aws-sdk-go-v2/internal/ini v1.6.0/go.mod h1:rdAuXeHWhI/zkpYcO5n8WCpaIgY9MUxFyBsuqq3kjyA= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.2 h1:pyVrNAf7Hwz0u39dLKN5t+n0+K/3rMYKuiOoIum3AsU= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.2/go.mod h1:mydrfOb9uiOYCxuCPR8YHQNQyGQwUQ7gPMZGBKbH8NY= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.0 h1:CJxo7ZBbaIzmXfV3hjcx36n9V87gJsIUPJflwqEHl3Q= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.0/go.mod h1:yjVfjuY4nD1EW9i387Kau+I6V5cBA5YnC/mWNopjZrI= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.2 h1:f2LhPofnjcdOQKRtumKjMvIHkfSQ8aH/rwKUDEQ/SB4= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.2/go.mod h1:q+xX0H4OfuWDuBy7y/LDi4v8IBOWuF+vtp8Z6ex+lw4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.2 h1:h7j73yuAVVjic8pqswh+L/7r2IHP43QwRyOu6zcCDDE= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.2/go.mod h1:H07AHdK5LSy8F7EJUQhoxyiCNkePoHj2D8P2yGTWafo= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.2 h1:gbIaOzpXixUpoPK+js/bCBK1QBDXM22SigsnzGZio0U= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.2/go.mod h1:p+S7RNbdGN8qgHDSg2SCQJ9FeMAmvcETQiVpeGhYnNM= +github.com/aws/aws-sdk-go-v2/service/s3 v1.42.1 h1:o6MCcX1rJW8Y3g+hvg2xpjF6JR6DftuYhfl3Nc1WV9Q= +github.com/aws/aws-sdk-go-v2/service/s3 v1.42.1/go.mod h1:UDtxEWbREX6y4KREapT+jjtjoH0TiVSS6f5nfaY1UaM= +github.com/aws/aws-sdk-go-v2/service/sso v1.17.1 h1:km+ZNjtLtpXYf42RdaDZnNHm9s7SYAuDGTafy6nd89A= +github.com/aws/aws-sdk-go-v2/service/sso v1.17.1/go.mod h1:aHBr3pvBSD5MbzOvQtYutyPLLRPbl/y9x86XyJJnUXQ= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.19.1 h1:iRFNqZH4a67IqPvK8xxtyQYnyrlsvwmpHOe9r55ggBA= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.19.1/go.mod h1:pTy5WM+6sNv2tB24JNKFtn6EvciQ5k40ZJ0pq/Iaxj0= +github.com/aws/aws-sdk-go-v2/service/sts v1.25.1 h1:txgVXIXWPXyqdiVn92BV6a/rgtpX31HYdsOYj0sVQQQ= +github.com/aws/aws-sdk-go-v2/service/sts v1.25.1/go.mod h1:VAiJiNaoP1L89STFlEMgmHX1bKixY+FaP+TpRFrmyZ4= +github.com/aws/smithy-go v1.16.0 h1:gJZEH/Fqh+RsvlJ1Zt4tVAtV6bKkp3cC+R6FCZMNzik= +github.com/aws/smithy-go v1.16.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=