From e7b1c6dd51b210510bdf0ed2b1fc2ce81253cad0 Mon Sep 17 00:00:00 2001 From: Gompei Date: Thu, 18 Nov 2021 17:43:34 +0900 Subject: [PATCH 1/2] Add file difference check function --- Makefile | 7 ++ examples/simple/example.go | 13 ++- files_difference.go | 31 ++++++ files_difference_test.go | 32 ++++++ md5.go | 37 +++++++ md5_test.go | 86 ++++++++++++++++ s3sync.go | 200 +++++++++++++++++++++++++++++++++++++ s3sync_test.go | 152 ++++++++++++++++++++++++++++ 8 files changed, 557 insertions(+), 1 deletion(-) create mode 100644 files_difference.go create mode 100644 files_difference_test.go create mode 100644 md5.go create mode 100644 md5_test.go diff --git a/Makefile b/Makefile index c62735c..99f0006 100644 --- a/Makefile +++ b/Makefile @@ -49,3 +49,10 @@ fixture: aws s3 --endpoint-url http://localhost:4572 mb s3://example-bucket-directory aws s3 --endpoint-url http://localhost:4572 mb s3://example-bucket-mime aws s3api --endpoint-url http://localhost:4572 put-object --bucket example-bucket-directory --key test/ + aws s3 --endpoint-url http://localhost:4572 mb s3://example-bucket-check-file-difference + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/file_and_directory/foo/ + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/file_and_directory/foo/bar/ + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/file_and_directory/foo/bar/baz/ + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/file_only/ + aws s3api --endpoint-url http://localhost:4572 put-object --bucket example-bucket-check-file-difference --key empty/ + aws s3api --endpoint-url http://localhost:4572 put-object --bucket example-bucket-check-file-difference --key directory_only/foo/ diff --git a/examples/simple/example.go b/examples/simple/example.go index 8622758..5d4d593 100644 --- a/examples/simple/example.go +++ b/examples/simple/example.go @@ -33,8 +33,19 @@ func main() { fmt.Printf("from=%s\n", os.Args[1]) fmt.Printf("to=%s\n", os.Args[2]) - err = s3sync.New(sess).Sync(os.Args[1], os.Args[2]) + syncManager := s3sync.New(sess) + + hasDiff, err := syncManager.HasDifference(os.Args[1], os.Args[2]) if err != nil { panic(err) } + + if hasDiff { + err = syncManager.Sync(os.Args[1], os.Args[2]) + if err != nil { + panic(err) + } + } else { + fmt.Println("There are no differences in the files.") + } } diff --git a/files_difference.go b/files_difference.go new file mode 100644 index 0000000..d0f6e0e --- /dev/null +++ b/files_difference.go @@ -0,0 +1,31 @@ +// Copyright 2021 SEQSENSE, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package s3sync + +import "sync" + +type filesDifference struct { + mu sync.Mutex + hasDifference bool +} + +func (c *filesDifference) Set(b bool) { + c.mu.Lock() + c.hasDifference = b + c.mu.Unlock() +} + +func (c *filesDifference) Get() bool { + return c.hasDifference +} diff --git a/files_difference_test.go b/files_difference_test.go new file mode 100644 index 0000000..8ba6640 --- /dev/null +++ b/files_difference_test.go @@ -0,0 +1,32 @@ +// Copyright 2021 SEQSENSE, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package s3sync + +import "testing" + +func TestFilesDifference(t *testing.T) { + t.Run("false", func(t *testing.T) { + hasDifference := &filesDifference{} + if hasDifference.Get() { + t.Error("filesDifference should return false") + } + }) + t.Run("true", func(t *testing.T) { + hasDifference := &filesDifference{} + hasDifference.Set(true) + if !hasDifference.Get() { + t.Error("filesDifference should return true") + } + }) +} diff --git a/md5.go b/md5.go new file mode 100644 index 0000000..df4e1f5 --- /dev/null +++ b/md5.go @@ -0,0 +1,37 @@ +// Copyright 2021 SEQSENSE, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package s3sync + +import ( + "crypto/md5" + "fmt" + "io" + "os" +) + +// getMd5Hash return the MD5 hash value of the specified file +func getMd5Hash(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + + hash := md5.New() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + + return fmt.Sprintf("%x", hash.Sum(nil)), nil +} diff --git a/md5_test.go b/md5_test.go new file mode 100644 index 0000000..b156a6f --- /dev/null +++ b/md5_test.go @@ -0,0 +1,86 @@ +// Copyright 2021 SEQSENSE, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package s3sync + +import ( + "io/ioutil" + "os" + "testing" +) + +func TestGetMd5Hash(t *testing.T) { + // The hash value should be obtained from a file + // with the same content that was created in advance. + tests := []struct { + name string + content string + wantHash string + hasError bool + }{ + { + "files with correct hash values", + "This is a test sentence.", + "4d6a0c4cf3f07eadd5ba147c67c6896f", + false, + }, + { + "files with correct hash values", + "column1,column2,column3\ntest,test,test", + "1ed7dedaf1bfac6642b52a19a4a1988c", + false, + }, + { + "not existed file", + "", + "", + true, + }, + } + + for _, tt := range tests { + temp, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(temp) + + if err != nil { + t.Fatal("Failed to create temp dir") + } + + t.Run(tt.name, func(t *testing.T) { + var testFile *os.File + var testFileName string + + if !tt.hasError { + testFile, err = ioutil.TempFile(temp, "s3synctest-") + if err != nil { + t.Fatal("Failed to create temp file") + } + testFileName = testFile.Name() + defer os.Remove(testFileName) + + if _, err = testFile.Write([]byte(tt.content)); err != nil { + t.Fatal("Failed to write temp file") + } + } + + got, err := getMd5Hash(testFileName) + if !tt.hasError && err != nil { + t.Errorf("got err. err:%v, want: err==nil", err) + } + + if got != tt.wantHash { + t.Fatalf("Unexpected hash. got: %v, want: %v", got, tt.wantHash) + } + }) + } +} diff --git a/s3sync.go b/s3sync.go index 282dc7e..7024a18 100644 --- a/s3sync.go +++ b/s3sync.go @@ -14,6 +14,7 @@ package s3sync import ( "errors" + "fmt" "net/url" "os" "path/filepath" @@ -57,6 +58,7 @@ type fileInfo struct { lastModified time.Time singleFile bool existsInSource bool + hash string } type fileOp struct { @@ -64,6 +66,13 @@ type fileOp struct { op operation } +type checkFiles struct { + sourceFile *fileInfo + destFile *fileInfo + destFileExists bool + err error +} + // New returns a new Manager. func New(sess *session.Session, options ...Option) *Manager { m := &Manager{ @@ -131,6 +140,60 @@ func (m *Manager) Sync(source, dest string) error { return errors.New("local to local sync is not supported") } +// HasDifference checks for differences with the sync destination based on the files present in the sync source. +func (m *Manager) HasDifference(source, dest string) (bool, error) { + sourceURL, err := url.Parse(source) + if err != nil { + return false, err + } + + destURL, err := url.Parse(dest) + if err != nil { + return false, err + } + + chJob := make(chan func()) + var wg sync.WaitGroup + for i := 0; i < m.nJobs; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for job := range chJob { + job() + } + }() + } + defer func() { + close(chJob) + wg.Wait() + }() + + if isS3URL(sourceURL) { + sourceS3Path, err := urlToS3Path(sourceURL) + if err != nil { + return false, err + } + if isS3URL(destURL) { + destS3Path, err := urlToS3Path(destURL) + if err != nil { + return false, err + } + return m.hasDifferenceS3ToS3(chJob, sourceS3Path, destS3Path) + } + return m.hasDifferenceS3ToLocal(chJob, sourceS3Path, dest) + } + + if isS3URL(destURL) { + destS3Path, err := urlToS3Path(destURL) + if err != nil { + return false, err + } + return m.hasDifferenceLocalToS3(chJob, source, destS3Path) + } + + return false, errors.New("local to local file differences check is not implemented") +} + func isS3URL(url *url.URL) bool { return url.Scheme == "s3" } @@ -202,6 +265,78 @@ func (m *Manager) syncS3ToLocal(chJob chan func(), sourcePath *s3Path, destPath return errs.ErrOrNil() } +func (m *Manager) hasDifferenceS3ToS3(chJob chan func(), sourcePath, destPath *s3Path) (bool, error) { + return false, errors.New("S3 to S3 file differences check feature is not implemented") +} + +func (m *Manager) hasDifferenceLocalToS3(chJob chan func(), sourcePath string, destPath *s3Path) (bool, error) { + wg := &sync.WaitGroup{} + hasDifference := &filesDifference{} + errs := &multiErr{} + + for checkFiles := range filterFilesForHasDifference( + listLocalFiles(sourcePath), m.listS3Files(destPath), + ) { + wg.Add(1) + checkFiles := checkFiles + chJob <- func() { + defer wg.Done() + if checkFiles.err != nil { + errs.Append(checkFiles.err) + return + } + + if !checkFiles.destFileExists { + hasDifference.Set(true) + return + } + + if isEqual, err := isEqualFile(checkFiles.sourceFile, checkFiles.destFile); err != nil { + errs.Append(err) + } else if !isEqual { + hasDifference.Set(true) + } + } + } + wg.Wait() + + return hasDifference.Get(), errs.ErrOrNil() +} + +func (m *Manager) hasDifferenceS3ToLocal(chJob chan func(), sourcePath *s3Path, destPath string) (bool, error) { + wg := &sync.WaitGroup{} + hasDifference := &filesDifference{} + errs := &multiErr{} + + for checkFiles := range filterFilesForHasDifference( + m.listS3Files(sourcePath), listLocalFiles(destPath), + ) { + wg.Add(1) + checkFiles := checkFiles + chJob <- func() { + defer wg.Done() + if checkFiles.err != nil { + errs.Append(checkFiles.err) + return + } + + if !checkFiles.destFileExists { + hasDifference.Set(true) + return + } + + if isEqual, err := isEqualFile(checkFiles.sourceFile, checkFiles.destFile); err != nil { + errs.Append(err) + } else if !isEqual { + hasDifference.Set(true) + } + } + } + wg.Wait() + + return hasDifference.Get(), errs.ErrOrNil() +} + func (m *Manager) download(file *fileInfo, sourcePath *s3Path, destPath string) error { var targetFilename string if !strings.HasSuffix(destPath, "/") && file.singleFile { @@ -394,6 +529,7 @@ func (m *Manager) listS3FileWithToken(c chan *fileInfo, path *s3Path, token *str size: *object.Size, lastModified: *object.LastModified, singleFile: true, + hash: strings.Replace(*object.ETag, "\"", "", -1), } } else { c <- &fileInfo{ @@ -401,6 +537,7 @@ func (m *Manager) listS3FileWithToken(c chan *fileInfo, path *s3Path, token *str path: *object.Key, size: *object.Size, lastModified: *object.LastModified, + hash: strings.Replace(*object.ETag, "\"", "", -1), } } } @@ -510,6 +647,37 @@ func filterFilesForSync(sourceFileChan, destFileChan chan *fileInfo, del bool) c return c } +// filterFilesForHasDifference filters the source files from the given destination files, and returns +// another channel which includes the file information necessary to be file differences check. +func filterFilesForHasDifference(sourceFileChan, destFileChan chan *fileInfo) chan *checkFiles { + c := make(chan *checkFiles) + + destFiles, err := fileInfoChanToMap(destFileChan) + + go func() { + defer close(c) + if err != nil { + c <- &checkFiles{err: err} + return + } + for sourceInfo := range sourceFileChan { + destInfo, ok := destFiles[sourceInfo.name] + if ok { + c <- &checkFiles{ + sourceFile: sourceInfo, + destFile: destInfo, + destFileExists: true, + } + } else { + println(fmt.Sprintf("The %s file does not exist in the destination to be compared", sourceInfo.name)) + c <- &checkFiles{destFileExists: false} + } + } + }() + + return c +} + // fileInfoChanToMap accumulates the fileInfos from the given channel and returns a map. // It retruns an error if the channel contains an error. func fileInfoChanToMap(files chan *fileInfo) (map[string]*fileInfo, error) { @@ -523,3 +691,35 @@ func fileInfoChanToMap(files chan *fileInfo) (map[string]*fileInfo, error) { } return result, nil } + +// isEqualFile use file size and hash value to check for identical files. +func isEqualFile(sourceFile, destFile *fileInfo) (bool, error) { + println(fmt.Sprintf("Checking File source:%s dest:%s", sourceFile.path, destFile.path)) + + // check file size + if sourceFile.size != destFile.size { + println(fmt.Sprintf("The size of the %s file does not match", sourceFile.name)) + return false, nil + } + + // check file hash + if sourceFile.hash == "" { + h, err := getMd5Hash(sourceFile.path) + if err != nil { + return false, err + } + sourceFile.hash = h + } else if destFile.hash == "" { + h, err := getMd5Hash(destFile.path) + if err != nil { + return false, err + } + destFile.hash = h + } + if sourceFile.hash != destFile.hash { + println(fmt.Sprintf("The hash values in the %s file do not match", sourceFile.name)) + return false, nil + } + + return true, nil +} diff --git a/s3sync_test.go b/s3sync_test.go index 1e04ed7..1236bdf 100644 --- a/s3sync_test.go +++ b/s3sync_test.go @@ -37,6 +37,14 @@ func TestS3syncNotImplemented(t *testing.T) { if err := m.Sync("s3://foo", "s3://bar"); err == nil { t.Fatal("s3 to s3 sync is not implemented yet") } + + if _, err := m.HasDifference("foo", "bar"); err == nil { + t.Fatal("local to local file differences check is not supported") + } + + if _, err := m.HasDifference("s3://foo", "s3://bar"); err == nil { + t.Fatal("s3 to s3 file differences check is not implemented yet") + } } func TestS3sync(t *testing.T) { @@ -238,6 +246,150 @@ func TestS3sync(t *testing.T) { }) } +func TestHasDifference(t *testing.T) { + t.Run("FileEmpty", func(t *testing.T) { + temp, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(temp) + + if err != nil { + t.Fatal("Failed to create temp dir") + } + + hasDiff, err := New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/empty") + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + + hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/empty", temp) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + }) + + t.Run("FileOnly", func(t *testing.T) { + temp, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(temp) + + if err != nil { + t.Fatal("Failed to create temp dir") + } + + hasDiff, err := New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/file_only") + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/file_only", temp) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if !hasDiff { + t.Fatal("There should be difference in the files") + } + + // Create a file in a temporary directory with the same contents as the dummy file. + data, err := ioutil.ReadFile(dummyFilename) + if err != nil { + t.Fatal("Failed to read", dummyFilename) + } + if err := ioutil.WriteFile(filepath.Join(temp, dummyFilename), data, 0644); err != nil { + t.Fatal("Failed to write", err) + } + + hasDiff, err = New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/file_only") + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/file_only", temp) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + }) + + t.Run("DirectoryOnly", func(t *testing.T) { + temp, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(temp) + + if err != nil { + t.Fatal("Failed to create temp dir") + } + + hasDiff, err := New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/directory_only") + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/directory_only", temp) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + }) + + t.Run("FileAndDirectory", func(t *testing.T) { + temp, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(temp) + + if err != nil { + t.Fatal("Failed to create temp dir") + } + + hasDiff, err := New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/file_and_directory") + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/file_and_directory", temp) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if !hasDiff { + t.Fatal("There should be difference in the files") + } + + // Set the file structure to the same state as the destination. + if err := os.MkdirAll(filepath.Join(temp, "foo", "bar", "baz"), 0755); err != nil { + t.Fatal("Failed to mkdir", err) + } + data, err := ioutil.ReadFile(dummyFilename) + if err != nil { + t.Fatal("Failed to read", dummyFilename) + } + for _, file := range []string{ + filepath.Join(temp, "foo", dummyFilename), + filepath.Join(temp, "foo", "bar", dummyFilename), + filepath.Join(temp, "foo", "bar", "baz", dummyFilename), + } { + if err := ioutil.WriteFile(file, data, 0644); err != nil { + t.Fatal("Failed to write", err) + } + } + + hasDiff, err = New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/file_and_directory") + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/file_and_directory", temp) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + }) +} + func TestDelete(t *testing.T) { data, err := ioutil.ReadFile(dummyFilename) if err != nil { From 79bb8467e00e0525bb8e98c0a5960a5ae41f64e8 Mon Sep 17 00:00:00 2001 From: Gompei Date: Tue, 21 Dec 2021 17:04:12 +0900 Subject: [PATCH 2/2] Fix file difference check function --- Makefile | 11 +- files_difference.go | 31 ---- files_difference_test.go | 32 ---- md5.go | 37 ----- md5_test.go | 86 ---------- s3sync.go | 124 +++++++-------- s3sync_test.go | 333 +++++++++++++++++++++++++++++---------- 7 files changed, 308 insertions(+), 346 deletions(-) delete mode 100644 files_difference.go delete mode 100644 files_difference_test.go delete mode 100644 md5.go delete mode 100644 md5_test.go diff --git a/Makefile b/Makefile index 99f0006..6c5b3a8 100644 --- a/Makefile +++ b/Makefile @@ -50,9 +50,10 @@ fixture: aws s3 --endpoint-url http://localhost:4572 mb s3://example-bucket-mime aws s3api --endpoint-url http://localhost:4572 put-object --bucket example-bucket-directory --key test/ aws s3 --endpoint-url http://localhost:4572 mb s3://example-bucket-check-file-difference - aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/file_and_directory/foo/ - aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/file_and_directory/foo/bar/ - aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/file_and_directory/foo/bar/baz/ - aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/file_only/ aws s3api --endpoint-url http://localhost:4572 put-object --bucket example-bucket-check-file-difference --key empty/ - aws s3api --endpoint-url http://localhost:4572 put-object --bucket example-bucket-check-file-difference --key directory_only/foo/ + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/equal/ + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/equal/foo/ + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/equal/foo/bar/ + aws s3 --endpoint-url http://localhost:4572 cp Makefile s3://example-bucket-check-file-difference/difference/ + aws s3 --endpoint-url http://localhost:4572 cp Makefile s3://example-bucket-check-file-difference/difference/foo/ + aws s3 --endpoint-url http://localhost:4572 cp Makefile s3://example-bucket-check-file-difference/difference/foo/bar/ diff --git a/files_difference.go b/files_difference.go deleted file mode 100644 index d0f6e0e..0000000 --- a/files_difference.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2021 SEQSENSE, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package s3sync - -import "sync" - -type filesDifference struct { - mu sync.Mutex - hasDifference bool -} - -func (c *filesDifference) Set(b bool) { - c.mu.Lock() - c.hasDifference = b - c.mu.Unlock() -} - -func (c *filesDifference) Get() bool { - return c.hasDifference -} diff --git a/files_difference_test.go b/files_difference_test.go deleted file mode 100644 index 8ba6640..0000000 --- a/files_difference_test.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2021 SEQSENSE, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package s3sync - -import "testing" - -func TestFilesDifference(t *testing.T) { - t.Run("false", func(t *testing.T) { - hasDifference := &filesDifference{} - if hasDifference.Get() { - t.Error("filesDifference should return false") - } - }) - t.Run("true", func(t *testing.T) { - hasDifference := &filesDifference{} - hasDifference.Set(true) - if !hasDifference.Get() { - t.Error("filesDifference should return true") - } - }) -} diff --git a/md5.go b/md5.go deleted file mode 100644 index df4e1f5..0000000 --- a/md5.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2021 SEQSENSE, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package s3sync - -import ( - "crypto/md5" - "fmt" - "io" - "os" -) - -// getMd5Hash return the MD5 hash value of the specified file -func getMd5Hash(filePath string) (string, error) { - file, err := os.Open(filePath) - if err != nil { - return "", err - } - defer file.Close() - - hash := md5.New() - if _, err := io.Copy(hash, file); err != nil { - return "", err - } - - return fmt.Sprintf("%x", hash.Sum(nil)), nil -} diff --git a/md5_test.go b/md5_test.go deleted file mode 100644 index b156a6f..0000000 --- a/md5_test.go +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2021 SEQSENSE, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package s3sync - -import ( - "io/ioutil" - "os" - "testing" -) - -func TestGetMd5Hash(t *testing.T) { - // The hash value should be obtained from a file - // with the same content that was created in advance. - tests := []struct { - name string - content string - wantHash string - hasError bool - }{ - { - "files with correct hash values", - "This is a test sentence.", - "4d6a0c4cf3f07eadd5ba147c67c6896f", - false, - }, - { - "files with correct hash values", - "column1,column2,column3\ntest,test,test", - "1ed7dedaf1bfac6642b52a19a4a1988c", - false, - }, - { - "not existed file", - "", - "", - true, - }, - } - - for _, tt := range tests { - temp, err := ioutil.TempDir("", "s3synctest") - defer os.RemoveAll(temp) - - if err != nil { - t.Fatal("Failed to create temp dir") - } - - t.Run(tt.name, func(t *testing.T) { - var testFile *os.File - var testFileName string - - if !tt.hasError { - testFile, err = ioutil.TempFile(temp, "s3synctest-") - if err != nil { - t.Fatal("Failed to create temp file") - } - testFileName = testFile.Name() - defer os.Remove(testFileName) - - if _, err = testFile.Write([]byte(tt.content)); err != nil { - t.Fatal("Failed to write temp file") - } - } - - got, err := getMd5Hash(testFileName) - if !tt.hasError && err != nil { - t.Errorf("got err. err:%v, want: err==nil", err) - } - - if got != tt.wantHash { - t.Fatalf("Unexpected hash. got: %v, want: %v", got, tt.wantHash) - } - }) - } -} diff --git a/s3sync.go b/s3sync.go index 7024a18..a5f251f 100644 --- a/s3sync.go +++ b/s3sync.go @@ -58,7 +58,6 @@ type fileInfo struct { lastModified time.Time singleFile bool existsInSource bool - hash string } type fileOp struct { @@ -67,10 +66,9 @@ type fileOp struct { } type checkFiles struct { - sourceFile *fileInfo - destFile *fileInfo - destFileExists bool - err error + sourceFile *fileInfo + destFile *fileInfo + err error } // New returns a new Manager. @@ -271,70 +269,62 @@ func (m *Manager) hasDifferenceS3ToS3(chJob chan func(), sourcePath, destPath *s func (m *Manager) hasDifferenceLocalToS3(chJob chan func(), sourcePath string, destPath *s3Path) (bool, error) { wg := &sync.WaitGroup{} - hasDifference := &filesDifference{} + var hasDifference bool errs := &multiErr{} - for checkFiles := range filterFilesForHasDifference( + for files := range filterFilesForHasDifference( listLocalFiles(sourcePath), m.listS3Files(destPath), ) { wg.Add(1) - checkFiles := checkFiles + files := files chJob <- func() { defer wg.Done() - if checkFiles.err != nil { - errs.Append(checkFiles.err) + if files.err != nil { + errs.Append(files.err) return } - - if !checkFiles.destFileExists { - hasDifference.Set(true) + notEqual, err := hasDifferenceFile(files) + if err != nil { + errs.Append(files.err) return - } - - if isEqual, err := isEqualFile(checkFiles.sourceFile, checkFiles.destFile); err != nil { - errs.Append(err) - } else if !isEqual { - hasDifference.Set(true) + } else if notEqual { + hasDifference = true } } } wg.Wait() - return hasDifference.Get(), errs.ErrOrNil() + return hasDifference, errs.ErrOrNil() } func (m *Manager) hasDifferenceS3ToLocal(chJob chan func(), sourcePath *s3Path, destPath string) (bool, error) { wg := &sync.WaitGroup{} - hasDifference := &filesDifference{} + var hasDifference bool errs := &multiErr{} - for checkFiles := range filterFilesForHasDifference( + for files := range filterFilesForHasDifference( m.listS3Files(sourcePath), listLocalFiles(destPath), ) { wg.Add(1) - checkFiles := checkFiles + files := files chJob <- func() { defer wg.Done() - if checkFiles.err != nil { - errs.Append(checkFiles.err) + if files.err != nil { + errs.Append(files.err) return } - - if !checkFiles.destFileExists { - hasDifference.Set(true) + notEqual, err := hasDifferenceFile(files) + if err != nil { + errs.Append(files.err) return - } - - if isEqual, err := isEqualFile(checkFiles.sourceFile, checkFiles.destFile); err != nil { - errs.Append(err) - } else if !isEqual { - hasDifference.Set(true) + } else if notEqual { + hasDifference = true } } } wg.Wait() - return hasDifference.Get(), errs.ErrOrNil() + return hasDifference, errs.ErrOrNil() } func (m *Manager) download(file *fileInfo, sourcePath *s3Path, destPath string) error { @@ -529,7 +519,6 @@ func (m *Manager) listS3FileWithToken(c chan *fileInfo, path *s3Path, token *str size: *object.Size, lastModified: *object.LastModified, singleFile: true, - hash: strings.Replace(*object.ETag, "\"", "", -1), } } else { c <- &fileInfo{ @@ -537,7 +526,6 @@ func (m *Manager) listS3FileWithToken(c chan *fileInfo, path *s3Path, token *str path: *object.Key, size: *object.Size, lastModified: *object.LastModified, - hash: strings.Replace(*object.ETag, "\"", "", -1), } } } @@ -627,7 +615,7 @@ func filterFilesForSync(sourceFileChan, destFileChan chan *fileInfo, del bool) c // 1. The dest doesn't exist // 2. The dest doesn't have the same size as the source // 3. The dest is older than the source - if !ok || sourceInfo.size != destInfo.size || sourceInfo.lastModified.After(destInfo.lastModified) { + if !ok || isSyncTargetFile(sourceInfo, destInfo) { c <- &fileOp{fileInfo: sourceInfo} } if ok { @@ -664,13 +652,13 @@ func filterFilesForHasDifference(sourceFileChan, destFileChan chan *fileInfo) ch destInfo, ok := destFiles[sourceInfo.name] if ok { c <- &checkFiles{ - sourceFile: sourceInfo, - destFile: destInfo, - destFileExists: true, + sourceFile: sourceInfo, + destFile: destInfo, } } else { - println(fmt.Sprintf("The %s file does not exist in the destination to be compared", sourceInfo.name)) - c <- &checkFiles{destFileExists: false} + c <- &checkFiles{ + sourceFile: sourceInfo, + } } } }() @@ -692,34 +680,28 @@ func fileInfoChanToMap(files chan *fileInfo) (map[string]*fileInfo, error) { return result, nil } -// isEqualFile use file size and hash value to check for identical files. -func isEqualFile(sourceFile, destFile *fileInfo) (bool, error) { - println(fmt.Sprintf("Checking File source:%s dest:%s", sourceFile.path, destFile.path)) - - // check file size - if sourceFile.size != destFile.size { - println(fmt.Sprintf("The size of the %s file does not match", sourceFile.name)) - return false, nil - } +// hasDifferenceFile checks for differences in the specified file. +func hasDifferenceFile(checkFiles *checkFiles) (bool, error) { + switch { + case checkFiles == nil || checkFiles.sourceFile == nil: + return false, errors.New("missing information needed to compare files") + case checkFiles.destFile == nil: + println(fmt.Sprintf("file %s does not exist at the comparison destination", checkFiles.sourceFile.name)) + return true, nil + case isSyncTargetFile(checkFiles.sourceFile, checkFiles.destFile): + println(fmt.Sprintf("the %s file statuses of the source and destination files do not match", checkFiles.sourceFile.name)) + return true, nil + } + return false, nil +} - // check file hash - if sourceFile.hash == "" { - h, err := getMd5Hash(sourceFile.path) - if err != nil { - return false, err - } - sourceFile.hash = h - } else if destFile.hash == "" { - h, err := getMd5Hash(destFile.path) - if err != nil { - return false, err - } - destFile.hash = h +// isSyncTargetFile determines if the target file should be updated based on the source file to be compared. +// If either file to be compared is nil, return false. +func isSyncTargetFile(sourceFile, destFile *fileInfo) bool { + if sourceFile == nil || destFile == nil { + return false + } else if sourceFile.size != destFile.size || sourceFile.lastModified.After(destFile.lastModified) { + return true } - if sourceFile.hash != destFile.hash { - println(fmt.Sprintf("The hash values in the %s file do not match", sourceFile.name)) - return false, nil - } - - return true, nil + return false } diff --git a/s3sync_test.go b/s3sync_test.go index 1236bdf..b485001 100644 --- a/s3sync_test.go +++ b/s3sync_test.go @@ -38,13 +38,13 @@ func TestS3syncNotImplemented(t *testing.T) { t.Fatal("s3 to s3 sync is not implemented yet") } - if _, err := m.HasDifference("foo", "bar"); err == nil { - t.Fatal("local to local file differences check is not supported") - } - if _, err := m.HasDifference("s3://foo", "s3://bar"); err == nil { t.Fatal("s3 to s3 file differences check is not implemented yet") } + + if _, err := m.HasDifference("foo", "bar"); err == nil { + t.Fatal("local to local file differences check is not supported") + } } func TestS3sync(t *testing.T) { @@ -247,22 +247,30 @@ func TestS3sync(t *testing.T) { } func TestHasDifference(t *testing.T) { - t.Run("FileEmpty", func(t *testing.T) { - temp, err := ioutil.TempDir("", "s3synctest") - defer os.RemoveAll(temp) + data, err := ioutil.ReadFile(dummyFilename) + if err != nil { + t.Fatal("Failed to read", dummyFilename) + } + dummyFileSize := len(data) + + t.Run("Empty Dir", func(t *testing.T) { + s3URL := "s3://example-bucket-check-file-difference/empty" + + localTempDir, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(localTempDir) if err != nil { t.Fatal("Failed to create temp dir") } - hasDiff, err := New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/empty") + hasDiff, err := New(getSession()).HasDifference(localTempDir, s3URL) if err != nil { t.Fatal("HasDifference should be successful", err) } else if hasDiff { t.Fatal("There should be no difference in the files") } - hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/empty", temp) + hasDiff, err = New(getSession()).HasDifference(s3URL, localTempDir) if err != nil { t.Fatal("HasDifference should be successful", err) } else if hasDiff { @@ -270,65 +278,51 @@ func TestHasDifference(t *testing.T) { } }) - t.Run("FileOnly", func(t *testing.T) { - temp, err := ioutil.TempDir("", "s3synctest") - defer os.RemoveAll(temp) + t.Run("Equal Files", func(t *testing.T) { + s3URL := "s3://example-bucket-check-file-difference/equal" - if err != nil { - t.Fatal("Failed to create temp dir") - } + localTempDir, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(localTempDir) - hasDiff, err := New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/file_only") if err != nil { - t.Fatal("HasDifference should be successful", err) - } else if hasDiff { - t.Fatal("There should be no difference in the files") - } - hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/file_only", temp) - if err != nil { - t.Fatal("HasDifference should be successful", err) - } else if !hasDiff { - t.Fatal("There should be difference in the files") + t.Fatal("Failed to create temp dir") } - // Create a file in a temporary directory with the same contents as the dummy file. - data, err := ioutil.ReadFile(dummyFilename) - if err != nil { - t.Fatal("Failed to read", dummyFilename) + // Set the file structure to the same state as the comparison target. + if err := os.MkdirAll(filepath.Join(localTempDir, "foo", "bar"), 0755); err != nil { + t.Fatal("Failed to mkdir", err) } - if err := ioutil.WriteFile(filepath.Join(temp, dummyFilename), data, 0644); err != nil { - t.Fatal("Failed to write", err) + for _, file := range []string{ + filepath.Join(localTempDir, dummyFilename), + filepath.Join(localTempDir, "foo", dummyFilename), + filepath.Join(localTempDir, "foo", "bar", dummyFilename), + } { + if err := ioutil.WriteFile(file, make([]byte, dummyFileSize), 0644); err != nil { + t.Fatal("Failed to write", err) + } } - hasDiff, err = New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/file_only") + hasDiff, err := New(getSession()).HasDifference(s3URL, localTempDir) if err != nil { t.Fatal("HasDifference should be successful", err) } else if hasDiff { t.Fatal("There should be no difference in the files") } - hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/file_only", temp) - if err != nil { - t.Fatal("HasDifference should be successful", err) - } else if hasDiff { - t.Fatal("There should be no difference in the files") - } - }) - t.Run("DirectoryOnly", func(t *testing.T) { - temp, err := ioutil.TempDir("", "s3synctest") - defer os.RemoveAll(temp) - - if err != nil { - t.Fatal("Failed to create temp dir") + // If the update time is newer than the comparison target file, + // the last update time is changed to earlier than the comparison target file because the file is to be updated. + oldTime := time.Date(1980, time.January, 1, 0, 0, 0, 0, time.UTC) + for _, file := range []string{ + filepath.Join(localTempDir, dummyFilename), + filepath.Join(localTempDir, "foo", dummyFilename), + filepath.Join(localTempDir, "foo", "bar", dummyFilename), + } { + if err := os.Chtimes(file, oldTime, oldTime); err != nil { + t.Fatal("Failed to changes the access and modification times", err) + } } - hasDiff, err := New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/directory_only") - if err != nil { - t.Fatal("HasDifference should be successful", err) - } else if hasDiff { - t.Fatal("There should be no difference in the files") - } - hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/directory_only", temp) + hasDiff, err = New(getSession()).HasDifference(localTempDir, s3URL) if err != nil { t.Fatal("HasDifference should be successful", err) } else if hasDiff { @@ -336,56 +330,41 @@ func TestHasDifference(t *testing.T) { } }) - t.Run("FileAndDirectory", func(t *testing.T) { - temp, err := ioutil.TempDir("", "s3synctest") - defer os.RemoveAll(temp) + t.Run("Different Files", func(t *testing.T) { + s3URL := "s3://example-bucket-check-file-difference/difference" - if err != nil { - t.Fatal("Failed to create temp dir") - } + localTempDir, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(localTempDir) - hasDiff, err := New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/file_and_directory") - if err != nil { - t.Fatal("HasDifference should be successful", err) - } else if hasDiff { - t.Fatal("There should be no difference in the files") - } - hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/file_and_directory", temp) if err != nil { - t.Fatal("HasDifference should be successful", err) - } else if !hasDiff { - t.Fatal("There should be difference in the files") + t.Fatal("Failed to create temp dir") } - // Set the file structure to the same state as the destination. - if err := os.MkdirAll(filepath.Join(temp, "foo", "bar", "baz"), 0755); err != nil { + if err := os.MkdirAll(filepath.Join(localTempDir, "foo", "bar"), 0755); err != nil { t.Fatal("Failed to mkdir", err) } - data, err := ioutil.ReadFile(dummyFilename) - if err != nil { - t.Fatal("Failed to read", dummyFilename) - } for _, file := range []string{ - filepath.Join(temp, "foo", dummyFilename), - filepath.Join(temp, "foo", "bar", dummyFilename), - filepath.Join(temp, "foo", "bar", "baz", dummyFilename), + filepath.Join(localTempDir, dummyFilename), + filepath.Join(localTempDir, "foo", dummyFilename), + filepath.Join(localTempDir, "foo", "bar", dummyFilename), } { - if err := ioutil.WriteFile(file, data, 0644); err != nil { + if err := ioutil.WriteFile(file, make([]byte, dummyFileSize), 0644); err != nil { t.Fatal("Failed to write", err) } } - hasDiff, err = New(getSession()).HasDifference(temp, "s3://example-bucket-check-file-difference/file_and_directory") + hasDiff, err := New(getSession()).HasDifference(localTempDir, s3URL) if err != nil { t.Fatal("HasDifference should be successful", err) - } else if hasDiff { - t.Fatal("There should be no difference in the files") + } else if !hasDiff { + t.Fatal("There should be difference in the files") } - hasDiff, err = New(getSession()).HasDifference("s3://example-bucket-check-file-difference/file_and_directory", temp) + + hasDiff, err = New(getSession()).HasDifference(s3URL, localTempDir) if err != nil { t.Fatal("HasDifference should be successful", err) - } else if hasDiff { - t.Fatal("There should be no difference in the files") + } else if !hasDiff { + t.Fatal("There should be difference in the files") } }) } @@ -840,6 +819,192 @@ func TestS3sync_GuessMime(t *testing.T) { } } +func TestHasDifferenceFile(t *testing.T) { + nowTime := time.Now() + + testCases := map[string]struct { + files *checkFiles + wantErr bool + expected bool + }{ + "Equal File": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + }, + expected: false, + }, + "Not Equal File Size": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 20, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + }, + expected: true, + }, + "Old Last Modified Dest File": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime.Add(1 * time.Hour), + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + }, + expected: true, + }, + "Old Last Modified Source File": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime.Add(1 * time.Hour), + }, + }, + expected: false, + }, + "No comparison source file information": { + files: &checkFiles{ + sourceFile: nil, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + }, + expected: false, + wantErr: true, + }, + "No comparison dest file information": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: nil, + }, + expected: true, + }, + "No comparison file information": { + files: &checkFiles{ + sourceFile: nil, + destFile: nil, + }, + expected: false, + wantErr: true, + }, + } + for name, tt := range testCases { + t.Run(name, func(t *testing.T) { + got, err := hasDifferenceFile(tt.files) + if err != nil && !tt.wantErr { + t.Fatal("hasDifferenceFile should be successful", err) + } + if got != tt.expected { + t.Fatalf("expected to be %t, actual %t", tt.expected, !tt.expected) + } + }) + } +} + +func TestIsSyncTargetFile(t *testing.T) { + nowTime := time.Now() + + testCases := map[string]struct { + sourceFile *fileInfo + destFile *fileInfo + expected bool + }{ + "Equal File": { + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + expected: false, + }, + "Not Equal File Size": { + sourceFile: &fileInfo{ + size: 20, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + expected: true, + }, + "Old Last Modified Dest File": { + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime.Add(1 * time.Hour), + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + expected: true, + }, + "Old Last Modified Source File": { + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime.Add(1 * time.Hour), + }, + expected: false, + }, + "No comparison source file information": { + sourceFile: nil, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + expected: false, + }, + "No comparison dest file information": { + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: nil, + expected: false, + }, + "No comparison file information": { + sourceFile: nil, + destFile: nil, + expected: false, + }, + } + for name, tt := range testCases { + t.Run(name, func(t *testing.T) { + if tt.expected != isSyncTargetFile(tt.sourceFile, tt.destFile) { + t.Fatalf("expected to be %t, actual %t", tt.expected, !tt.expected) + } + }) + } +} + type dummyLogger struct { log func(...interface{}) }