From 9c50b14fd3b1ee605298cb176c316089c083c4ea Mon Sep 17 00:00:00 2001 From: Ben Boyter Date: Wed, 10 Dec 2025 08:46:35 +1100 Subject: [PATCH 1/4] working on hashignore support and other exclusions --- go.mod | 3 + go.sum | 4 + main.go | 40 + processor/file.go | 49 ++ processor/processor.go | 18 + .../github.com/boyter/gocodewalker/.gitignore | 18 + .../boyter/gocodewalker/.goreleaser.yml | 40 + vendor/github.com/boyter/gocodewalker/.ignore | 2 + vendor/github.com/boyter/gocodewalker/LICENSE | 7 + .../github.com/boyter/gocodewalker/Makefile | 32 + .../github.com/boyter/gocodewalker/README.md | 131 ++++ .../boyter/gocodewalker/dir_suffix.go | 30 + vendor/github.com/boyter/gocodewalker/file.go | 727 ++++++++++++++++++ .../boyter/gocodewalker/gitmodule.go | 23 + .../boyter/gocodewalker/go-gitignore/LICENSE | 21 + .../gocodewalker/go-gitignore/README.md | 94 +++ .../boyter/gocodewalker/go-gitignore/cache.go | 62 ++ .../boyter/gocodewalker/go-gitignore/doc.go | 10 + .../boyter/gocodewalker/go-gitignore/error.go | 32 + .../gocodewalker/go-gitignore/errors.go | 13 + .../gocodewalker/go-gitignore/exclude.go | 42 + .../gocodewalker/go-gitignore/gitignore.go | 338 ++++++++ .../boyter/gocodewalker/go-gitignore/lexer.go | 477 ++++++++++++ .../boyter/gocodewalker/go-gitignore/match.go | 25 + .../gocodewalker/go-gitignore/parser.go | 446 +++++++++++ .../gocodewalker/go-gitignore/pattern.go | 288 +++++++ .../gocodewalker/go-gitignore/position.go | 37 + .../gocodewalker/go-gitignore/repository.go | 276 +++++++ .../boyter/gocodewalker/go-gitignore/rune.go | 17 + .../boyter/gocodewalker/go-gitignore/token.go | 45 ++ .../gocodewalker/go-gitignore/tokenset.go | 17 + .../gocodewalker/go-gitignore/tokentype.go | 44 ++ .../github.com/boyter/gocodewalker/hidden.go | 19 + .../boyter/gocodewalker/hidden_windows.go | 30 + .../danwakefield/fnmatch/.gitignore | 24 + .../github.com/danwakefield/fnmatch/LICENSE | 23 + .../github.com/danwakefield/fnmatch/README.md | 4 + .../danwakefield/fnmatch/fnmatch.go | 219 ++++++ vendor/golang.org/x/sync/LICENSE | 27 + vendor/golang.org/x/sync/PATENTS | 22 + vendor/golang.org/x/sync/errgroup/errgroup.go | 151 ++++ vendor/modules.txt | 10 + 42 files changed, 3937 insertions(+) create mode 100644 vendor/github.com/boyter/gocodewalker/.gitignore create mode 100644 vendor/github.com/boyter/gocodewalker/.goreleaser.yml create mode 100644 vendor/github.com/boyter/gocodewalker/.ignore create mode 100644 vendor/github.com/boyter/gocodewalker/LICENSE create mode 100644 vendor/github.com/boyter/gocodewalker/Makefile create mode 100644 vendor/github.com/boyter/gocodewalker/README.md create mode 100644 vendor/github.com/boyter/gocodewalker/dir_suffix.go create mode 100644 vendor/github.com/boyter/gocodewalker/file.go create mode 100644 vendor/github.com/boyter/gocodewalker/gitmodule.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/LICENSE create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/README.md create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/cache.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/doc.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/error.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/errors.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/exclude.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/gitignore.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/lexer.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/match.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/parser.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/pattern.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/position.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/repository.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/rune.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/token.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/tokenset.go create mode 100644 vendor/github.com/boyter/gocodewalker/go-gitignore/tokentype.go create mode 100644 vendor/github.com/boyter/gocodewalker/hidden.go create mode 100644 vendor/github.com/boyter/gocodewalker/hidden_windows.go create mode 100644 vendor/github.com/danwakefield/fnmatch/.gitignore create mode 100644 vendor/github.com/danwakefield/fnmatch/LICENSE create mode 100644 vendor/github.com/danwakefield/fnmatch/README.md create mode 100644 vendor/github.com/danwakefield/fnmatch/fnmatch.go create mode 100644 vendor/golang.org/x/sync/LICENSE create mode 100644 vendor/golang.org/x/sync/PATENTS create mode 100644 vendor/golang.org/x/sync/errgroup/errgroup.go diff --git a/go.mod b/go.mod index 91bcc8f..7832656 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/boyter/hashit go 1.25.0 require ( + github.com/boyter/gocodewalker v1.5.1 github.com/cespare/xxhash/v2 v2.3.0 github.com/djherbis/times v1.6.0 github.com/gosuri/uiprogress v0.0.1 @@ -15,6 +16,7 @@ require ( ) require ( + github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gosuri/uilive v0.0.4 // indirect @@ -25,6 +27,7 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/spf13/pflag v1.0.5 // indirect golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect + golang.org/x/sync v0.16.0 // indirect golang.org/x/sys v0.36.0 // indirect modernc.org/libc v1.66.10 // indirect modernc.org/mathutil v1.7.1 // indirect diff --git a/go.sum b/go.sum index 0bb31d9..e856015 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,10 @@ +github.com/boyter/gocodewalker v1.5.1 h1:0YeK2QAkd+ymW5MsagMZapIXD3v9/vrZl0HkFSLpKsw= +github.com/boyter/gocodewalker v1.5.1/go.mod h1:9k+yM6+fIx61F0xI9ChXEGE5DYoLhggw8AxSOtW+kKo= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 h1:y5HC9v93H5EPKqaS1UYVg1uYah5Xf51mBfIoWehClUQ= +github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964/go.mod h1:Xd9hchkHSWYkEqJwUGisez3G1QY8Ryz0sdWrLPMGjLk= github.com/djherbis/times v1.6.0 h1:w2ctJ92J8fBvWPxugmXIv7Nz7Q3iDMKNx9v5ocVH20c= github.com/djherbis/times v1.6.0/go.mod h1:gOHeRAz2h+VJNZ5Gmc/o7iD9k4wW7NMVqieYCY99oc0= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= diff --git a/main.go b/main.go index 86b95f7..7141fe0 100644 --- a/main.go +++ b/main.go @@ -3,6 +3,7 @@ package main import ( + "fmt" "os" "path/filepath" "runtime" @@ -33,6 +34,7 @@ func main() { } } + fmt.Println(processor.GitIgnore, processor.GitModuleIgnore) processor.DirFilePaths = filePaths processor.Process() }, @@ -145,6 +147,44 @@ func main() { "input file of newline seperated file locations to process", ) + flags.BoolVar( + &processor.GitIgnore, + "gitignore", + false, + "enable .gitignore file logic", + ) + flags.BoolVar( + &processor.GitModuleIgnore, + "gitmodule", + false, + "enable .gitmodules file logic", + ) + flags.StringSliceVar( + &processor.PathDenyList, + "exclude-dir", + []string{}, + "directories to exclude", + ) + flags.BoolVar( + &processor.Ignore, + "ignore", + false, + "enable .ignore file logic", + ) + flags.BoolVar( + &processor.HashIgnore, + "hashignore", + false, + "enable .hashignore file logic", + ) + flags.StringArrayVarP( + &processor.Exclude, + "not-match", + `M`, + []string{}, + "ignore files and directories matching regular expression", + ) + if err := rootCmd.Execute(); err != nil { os.Exit(1) } diff --git a/processor/file.go b/processor/file.go index 01d5de7..71198b3 100644 --- a/processor/file.go +++ b/processor/file.go @@ -4,8 +4,10 @@ package processor import ( "fmt" + "github.com/boyter/gocodewalker" "os" "path/filepath" + "regexp" ) func walkDirectory(toWalk string, output chan string) { @@ -28,3 +30,50 @@ func walkDirectory(toWalk string, output chan string) { } } } + +func walkDirectoryWithIgnore(toWalk string, output chan string) { + fileListQueue := make(chan *gocodewalker.File, 1000) + fileWalker := gocodewalker.NewFileWalker(toWalk, fileListQueue) + + // we only want to have a custom ignore file + fileWalker.IgnoreGitIgnore = true + fileWalker.IgnoreIgnoreFile = true + + fileWalker.IgnoreGitIgnore = GitIgnore + fileWalker.IgnoreIgnoreFile = Ignore + fileWalker.IgnoreGitModules = GitModuleIgnore + fileWalker.IncludeHidden = true + fileWalker.ExcludeDirectory = PathDenyList + + if HashIgnore { + fileWalker.CustomIgnore = []string{".hashignore"} + } + + // handle the errors by printing them out and then ignore + errorHandler := func(err error) bool { + printError(err.Error()) + return true + } + fileWalker.SetErrorHandler(errorHandler) + + for _, exclude := range Exclude { + regexpResult, err := regexp.Compile(exclude) + if err == nil { + fileWalker.ExcludeFilenameRegex = append(fileWalker.ExcludeFilenameRegex, regexpResult) + fileWalker.ExcludeDirectoryRegex = append(fileWalker.ExcludeDirectoryRegex, regexpResult) + } else { + printError(err.Error()) + } + } + + go func() { + err := fileWalker.Start() + if err != nil { + printError(err.Error()) + } + }() + + for f := range fileListQueue { + fmt.Println(f.Location) + } +} diff --git a/processor/processor.go b/processor/processor.go index 733c86b..cc3ca1f 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -71,6 +71,24 @@ var StreamSize int64 = 1_000_000 // FileInput indicates we have a file passed in which consists of a var FileInput = "" +// GitIgnore set true to enable .gitignore checks +var GitIgnore = false + +// GitModuleIgnore set true to enable .gitmodules checks +var GitModuleIgnore = false + +// Ignore set true to enable ignore file checks +var Ignore = false + +// HashIgnore set true to enable hashignore file checks +var HashIgnore = true + +// PathDenyList sets the paths that should be skipped +var PathDenyList = []string{} + +// Exclude is a regular expression which is used to exclude files from being processed +var Exclude = []string{} + var NoThreads = runtime.NumCPU() // String mapping for hash names diff --git a/vendor/github.com/boyter/gocodewalker/.gitignore b/vendor/github.com/boyter/gocodewalker/.gitignore new file mode 100644 index 0000000..eb24029 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/.gitignore @@ -0,0 +1,18 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +.idea +dist/ diff --git a/vendor/github.com/boyter/gocodewalker/.goreleaser.yml b/vendor/github.com/boyter/gocodewalker/.goreleaser.yml new file mode 100644 index 0000000..cb5c981 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/.goreleaser.yml @@ -0,0 +1,40 @@ +# This is an example .goreleaser.yml file with some sensible defaults. +# Make sure to check the documentation at https://goreleaser.com +before: + hooks: + # You may remove this if you don't use go modules. + - go mod tidy + # you may remove this if you don't need go generate + - go generate ./... +builds: + - skip: true + +archives: + - format: tar.gz + # this name template makes the OS and Arch compatible with the results of uname. + name_template: >- + {{ .ProjectName }}_ + {{- title .Os }}_ + {{- if eq .Arch "amd64" }}x86_64 + {{- else if eq .Arch "386" }}i386 + {{- else }}{{ .Arch }}{{ end }} + {{- if .Arm }}v{{ .Arm }}{{ end }} + # use zip for windows archives + format_overrides: + - goos: windows + format: zip +checksum: + name_template: 'checksums.txt' +snapshot: + name_template: "{{ incpatch .Version }}-next" +changelog: + sort: asc + filters: + exclude: + - '^docs:' + - '^test:' + +# The lines beneath this are called `modelines`. See `:help modeline` +# Feel free to remove those if you don't want/use them. +# yaml-language-server: $schema=https://goreleaser.com/static/schema.json +# vim: set ts=2 sw=2 tw=0 fo=cnqoj diff --git a/vendor/github.com/boyter/gocodewalker/.ignore b/vendor/github.com/boyter/gocodewalker/.ignore new file mode 100644 index 0000000..5ef06f7 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/.ignore @@ -0,0 +1,2 @@ +/ +go-gitignore \ No newline at end of file diff --git a/vendor/github.com/boyter/gocodewalker/LICENSE b/vendor/github.com/boyter/gocodewalker/LICENSE new file mode 100644 index 0000000..f3374c9 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/LICENSE @@ -0,0 +1,7 @@ +MIT License Copyright (c) 2021 Ben Boyter + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/vendor/github.com/boyter/gocodewalker/Makefile b/vendor/github.com/boyter/gocodewalker/Makefile new file mode 100644 index 0000000..d779648 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/Makefile @@ -0,0 +1,32 @@ +# Some people have gotestsum installed and like it so use it if it exists +HAS_GOTESTSUM := $(shell which gotestsum) +ifdef HAS_GOTESTSUM + TEST_CMD = gotestsum --format testname --packages="./..." -- -count=1 -tags=integration -v -p 1 +else + TEST_CMD = go test ./... --count=1 -tags=integration +endif + +lint: + @golangci-lint run --fix + @golangci-lint run + +test: + @$(TEST_CMD) + +test-run: + @$(TEST_CMD) -run=$(RUN) + +fuzz: + go test -fuzz=FuzzTestGitIgnore -fuzztime 30s + +test-coverage: + go test ./... -coverprofile coverage.out && go tool cover -html=coverage.out -o coverage.html + +mod: + @go mod tidy + @go mod vendor + +clean: + go clean -modcache + +all: mod lint test fuzz diff --git a/vendor/github.com/boyter/gocodewalker/README.md b/vendor/github.com/boyter/gocodewalker/README.md new file mode 100644 index 0000000..08e2563 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/README.md @@ -0,0 +1,131 @@ +# gocodewalker + +[![Go Report Card](https://goreportcard.com/badge/github.com/boyter/gocodewalker)](https://goreportcard.com/report/github.com/boyter/gocodewalker) +[![Str Count Badge](https://sloc.xyz/github/boyter/gocodewalker/)](https://github.com/boyter/gocodewalker/) + +Library to help with walking of code directories in Go. + +The problem. You want to walk the directories of a code repository. You want to respect .gitignore and .ignore files, and +some are nested. This library is the answer. + + - Designed to walk code repositories or find the root of them. + - By default, respects both .gitignore and .ignore files (can be disabled) and nested ones for accuracy + - Has configurable options for skipping files based on regex, extension or general match + - Uses readdir to provide as fast as possible file walking + +NB this was moved from go-code-walker due to the name being annoying and to ensure it has a unique package name. Should still be drop in replaceable +so long as you refer to the new package name. + +https://pkg.go.dev/github.com/boyter/gocodewalker + +Package provides file operations specific to code repositories such as walking the file tree obeying .ignore and .gitignore files +or looking for the root directory assuming already in a git project. + +Example of usage, + +```go +fileListQueue := make(chan *gocodewalker.File, 100) + +fileWalker := gocodewalker.NewFileWalker(".", fileListQueue) + +// restrict to only process files that have the .go extension +fileWalker.AllowListExtensions = append(fileWalker.AllowListExtensions, "go") + +// handle the errors by printing them out and then ignore +errorHandler := func(e error) bool { + fmt.Println("ERR", e.Error()) + return true +} +fileWalker.SetErrorHandler(errorHandler) + +go fileWalker.Start() + +for f := range fileListQueue { + fmt.Println(f.Location) +} +``` + +The above by default will recursively add files to the fileListQueue respecting both .ignore and .gitignore files if found, and +only adding files with the go extension into the queue. + +You can also run the walker in parallel with the results intermixed if required, + +```go +fileListQueue := make(chan *gocodewalker.File, 100) + +fileWalker := gocodewalker.NewParallelFileWalker([]string{".", "someotherdir"}, fileListQueue) +go fileWalker.Start() + +for f := range fileListQueue { + fmt.Println(f.Location) +} +``` + +All code is licenced as MIT. + +### Error Handler + +You can supply your own error handler when walking. This allows you to perform an action when there is an error +and decide if the walker should continue to process, or return. + +The simplest handler is the below, which if set will swallow all errors and continue as best it can. + +```go +errorHandler := func(e error) bool { + return true +} +fileWalker.SetErrorHandler(errorHandler) +``` + +If you wanted to return on errors you could use the following. + +```go +errorHandler := func(e error) bool { + return false +} +fileWalker.SetErrorHandler(errorHandler) +``` + +If you wanted to terminate walking on an error you could use the following, which would cause it to return the error, +and then terminate all walking. This might be desirable where any error indicates a total failure. + +```go +errorHandler := func(e error) bool { + fileWalker.Terminate() + return false +} +fileWalker.SetErrorHandler(errorHandler) +``` + +### Binary Checking + +You can ask it to ignore binary files for you by setting `IgnoreBinaryFiles` to true and optionally +`IgnoreBinaryFileBytes` to the number of bytes you want to check which by default is set to 1,000. + +This will have a performance impact as gocodewalker will open each file, so you may want to do this check yourself +if performance is your goal. + +```go +fileListQueue := make(chan *gocodewalker.File, 100) + +fileWalker := gocodewalker.NewFileWalker(".", fileListQueue) + +// set to ignore binary files +fileWalker.IgnoreBinaryFiles = true +fileWalker.IgnoreBinaryFileBytes = 500 +``` + +The check itself looks for a null byte `if b == 0 {` which is a fast mostly accurate way of checking for +a binary file. + +### Testing + +Done through unit/integration tests. Otherwise see https://github.com/svent/gitignore-test + +See `./cmd/gocodewalker/main.go` for an example of how to implement and validate + +### Info + +Details on how gitignores work + +https://stackoverflow.com/questions/71735516/proper-way-to-setup-multiple-gitignore-files-in-nested-folders-of-a-repository diff --git a/vendor/github.com/boyter/gocodewalker/dir_suffix.go b/vendor/github.com/boyter/gocodewalker/dir_suffix.go new file mode 100644 index 0000000..0c0dc9f --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/dir_suffix.go @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: MIT + +package gocodewalker + +import ( + "path/filepath" + "strings" +) + +// isSuffixDir returns true if base ends with suffix. Suffix "/" will be trimmed. +// suffix must be a valid sub dir of base. +// For examples: +// - isSuffixDir("a", "a") returns true +// - isSuffixDir("a/b/c", "c") returns true +// - isSuffixDir("a/b/c", "b/c") returns true +// - isSuffixDir("a/b/c", "b") returns false +// - isSuffixDir("a/b/c", "a/b") returns false, "a/b" is a valid sub dir but not at the end of "a/b/c" +// - isSuffixDir("a/bb/c", "b/c") returns false +func isSuffixDir(base string, suffix string) bool { + if base == "" || suffix == "" { + return false + } + base = strings.TrimSuffix(filepath.ToSlash(base), "/") + suffix = strings.TrimSuffix(filepath.ToSlash(suffix), "/") + newBase := strings.TrimSuffix(base, suffix) + if newBase == base { + return false + } + return strings.HasSuffix(newBase, "/") || newBase == "" +} diff --git a/vendor/github.com/boyter/gocodewalker/file.go b/vendor/github.com/boyter/gocodewalker/file.go new file mode 100644 index 0000000..1ded287 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/file.go @@ -0,0 +1,727 @@ +// Package file provides file operations specific to code repositories +// such as walking the file tree obeying .ignore and .gitignore files +// or looking for the root directory assuming already in a git project + +// SPDX-License-Identifier: MIT + +package gocodewalker + +import ( + "bytes" + "errors" + "io" + "io/fs" + "os" + "path" + "path/filepath" + "regexp" + "slices" + "strings" + "sync" + + "github.com/boyter/gocodewalker/go-gitignore" + "golang.org/x/sync/errgroup" +) + +const ( + GitIgnore = ".gitignore" + Ignore = ".ignore" + GitModules = ".gitmodules" + IgnoreBinaryFileBytes = 1000 +) + +// ErrTerminateWalk error which indicates that the walker was terminated +var ErrTerminateWalk = errors.New("gocodewalker terminated") + +// File is a struct returned which contains the location and the filename of the file that passed all exclusion rules +type File struct { + Location string + Filename string +} + +var semaphoreCount = 8 + +type FileWalker struct { + fileListQueue chan<- *File + errorsHandler func(error) bool // If returns true will continue to process where possible, otherwise returns if possible + directory string + directories []string + LocationExcludePattern []string // Case-sensitive patterns which exclude directory/file matches + IncludeDirectory []string + ExcludeDirectory []string // Paths to always ignore such as .git,.svn and .hg + IncludeFilename []string + ExcludeFilename []string + IncludeDirectoryRegex []*regexp.Regexp // Must match regex as logical OR IE can match any of them + ExcludeDirectoryRegex []*regexp.Regexp + IncludeFilenameRegex []*regexp.Regexp + ExcludeFilenameRegex []*regexp.Regexp + AllowListExtensions []string // Which extensions should be allowed case sensitive + ExcludeListExtensions []string // Which extensions should be excluded case sensitive + walkMutex sync.Mutex + terminateWalking bool + isWalking bool + IgnoreIgnoreFile bool // Should .ignore files be respected? + IgnoreGitIgnore bool // Should .gitignore files be respected? + IgnoreGitModules bool // Should .gitmodules files be respected? + CustomIgnore []string // Custom ignore files + CustomIgnorePatterns []string //Custom ignore patterns + IncludeHidden bool // Should hidden files and directories be included/walked + osOpen func(name string) (*os.File, error) + osReadFile func(name string) ([]byte, error) + countingSemaphore chan bool + semaphoreCount int + MaxDepth int + IgnoreBinaryFiles bool // Should we open the file and try to determine if it is binary? + IgnoreBinaryFileBytes int // How many bytes should be used +} + +// NewFileWalker constructs a filewalker, which will walk the supplied directory +// and output File results to the supplied queue as it finds them +func NewFileWalker(directory string, fileListQueue chan<- *File) *FileWalker { + return &FileWalker{ + fileListQueue: fileListQueue, + errorsHandler: func(e error) bool { return true }, // a generic one that just swallows everything + directory: directory, + LocationExcludePattern: nil, + IncludeDirectory: nil, + ExcludeDirectory: nil, + IncludeFilename: nil, + ExcludeFilename: nil, + IncludeDirectoryRegex: nil, + ExcludeDirectoryRegex: nil, + IncludeFilenameRegex: nil, + ExcludeFilenameRegex: nil, + AllowListExtensions: nil, + ExcludeListExtensions: nil, + walkMutex: sync.Mutex{}, + terminateWalking: false, + isWalking: false, + IgnoreIgnoreFile: false, + IgnoreGitIgnore: false, + CustomIgnore: []string{}, + CustomIgnorePatterns: []string{}, + IgnoreGitModules: false, + IncludeHidden: false, + osOpen: os.Open, + osReadFile: os.ReadFile, + countingSemaphore: make(chan bool, semaphoreCount), + semaphoreCount: semaphoreCount, + MaxDepth: -1, + IgnoreBinaryFiles: false, + IgnoreBinaryFileBytes: IgnoreBinaryFileBytes, + } +} + +// NewParallelFileWalker constructs a filewalker, which will walk the supplied directories in parallel +// and output File results to the supplied queue as it finds them +func NewParallelFileWalker(directories []string, fileListQueue chan<- *File) *FileWalker { + return &FileWalker{ + fileListQueue: fileListQueue, + errorsHandler: func(e error) bool { return true }, // a generic one that just swallows everything + directories: directories, + LocationExcludePattern: nil, + IncludeDirectory: nil, + ExcludeDirectory: nil, + IncludeFilename: nil, + ExcludeFilename: nil, + IncludeDirectoryRegex: nil, + ExcludeDirectoryRegex: nil, + IncludeFilenameRegex: nil, + ExcludeFilenameRegex: nil, + AllowListExtensions: nil, + ExcludeListExtensions: nil, + walkMutex: sync.Mutex{}, + terminateWalking: false, + isWalking: false, + IgnoreIgnoreFile: false, + IgnoreGitIgnore: false, + CustomIgnore: []string{}, + CustomIgnorePatterns: []string{}, + IgnoreGitModules: false, + IncludeHidden: false, + osOpen: os.Open, + osReadFile: os.ReadFile, + countingSemaphore: make(chan bool, semaphoreCount), + semaphoreCount: semaphoreCount, + MaxDepth: -1, + IgnoreBinaryFiles: false, + IgnoreBinaryFileBytes: IgnoreBinaryFileBytes, + } +} + +// SetConcurrency sets the concurrency when walking +// which controls the number of goroutines that +// walk directories concurrently +// by default it is set to 8 +// must be a whole integer greater than 0 +func (f *FileWalker) SetConcurrency(i int) { + f.walkMutex.Lock() + defer f.walkMutex.Unlock() + if i >= 1 { + f.semaphoreCount = i + } +} + +// Walking gets the state of the file walker and determine +// if we are walking or not +func (f *FileWalker) Walking() bool { + f.walkMutex.Lock() + defer f.walkMutex.Unlock() + return f.isWalking +} + +// Terminate have the walker break out of walking and return as +// soon as it possibly can. This is needed because +// this walker needs to work in a TUI interactive mode and +// as such we need to be able to end old processes +func (f *FileWalker) Terminate() { + f.walkMutex.Lock() + defer f.walkMutex.Unlock() + f.terminateWalking = true +} + +// SetErrorHandler sets the function that is called on processing any error +// where if you return true it will attempt to continue processing, and if false +// will return the error instantly +func (f *FileWalker) SetErrorHandler(errors func(error) bool) { + if errors != nil { + f.errorsHandler = errors + } +} + +// Start will start walking the supplied directory with the supplied settings +// and putting files that mach into the supplied channel. +// Returns usual ioutil errors if there is a file issue +// and a ErrTerminateWalk if terminate is called while walking +func (f *FileWalker) Start() error { + f.walkMutex.Lock() + f.isWalking = true + f.walkMutex.Unlock() + + // we now set the counting semaphore based on the count + // done here because it should not change while walking + f.countingSemaphore = make(chan bool, semaphoreCount) + + var err error + if len(f.directories) != 0 { + eg := errgroup.Group{} + for _, directory := range f.directories { + d := directory // capture var + eg.Go(func() error { + return f.walkDirectoryRecursive(0, d, []gitignore.GitIgnore{}, []gitignore.GitIgnore{}, []gitignore.GitIgnore{}, []gitignore.GitIgnore{}) + }) + } + + err = eg.Wait() + } else { + if f.directory != "" { + err = f.walkDirectoryRecursive(0, f.directory, []gitignore.GitIgnore{}, []gitignore.GitIgnore{}, []gitignore.GitIgnore{}, []gitignore.GitIgnore{}) + } + } + + close(f.fileListQueue) + + f.walkMutex.Lock() + f.isWalking = false + f.walkMutex.Unlock() + + return err +} + +func (f *FileWalker) walkDirectoryRecursive(iteration int, + directory string, + gitignores []gitignore.GitIgnore, + ignores []gitignore.GitIgnore, + moduleIgnores []gitignore.GitIgnore, + customIgnores []gitignore.GitIgnore) error { + + // implement max depth option + if f.MaxDepth != -1 && iteration >= f.MaxDepth { + return nil + } + + if iteration == 1 { + f.countingSemaphore <- true + defer func() { + <-f.countingSemaphore + }() + } + + // NB have to call unlock not using defer because method is recursive + // and will deadlock if not done manually + f.walkMutex.Lock() + if f.terminateWalking { + f.walkMutex.Unlock() + return ErrTerminateWalk + } + f.walkMutex.Unlock() + + d, err := f.osOpen(directory) + if err != nil { + // nothing we can do with this so return nil and process as best we can + if f.errorsHandler(err) { + return nil + } + return err + } + defer func(d *os.File) { + err := d.Close() + if err != nil { + f.errorsHandler(err) + } + }(d) + + foundFiles, err := d.ReadDir(-1) + if err != nil { + // nothing we can do with this so return nil and process as best we can + if f.errorsHandler(err) { + return nil + } + return err + } + + files := []fs.DirEntry{} + dirs := []fs.DirEntry{} + + // We want to break apart the files and directories from the + // return as we loop over them differently and this avoids some + // nested if logic at the expense of a "redundant" loop + for _, file := range foundFiles { + if file.IsDir() { + dirs = append(dirs, file) + } else { + files = append(files, file) + } + } + + // Pull out all ignore, gitignore and gitmodule files and add them + // to out collection of gitignores to be applied for this pass + // and any subdirectories + // Since they can apply to the current list of files we need to ensure + // we do this before processing files themselves + for _, file := range files { + if !f.IgnoreGitIgnore { + if file.Name() == GitIgnore { + c, err := f.osReadFile(filepath.Join(directory, file.Name())) + if err != nil { + if f.errorsHandler(err) { + continue // if asked to ignore it lets continue + } + return err + } + + abs, err := filepath.Abs(directory) + if err != nil { + if f.errorsHandler(err) { + continue // if asked to ignore it lets continue + } + return err + } + + gitIgnore := gitignore.New(bytes.NewReader(c), abs, nil) + gitignores = append(gitignores, gitIgnore) + } + } + + if !f.IgnoreIgnoreFile { + if file.Name() == Ignore { + c, err := f.osReadFile(filepath.Join(directory, file.Name())) + if err != nil { + if f.errorsHandler(err) { + continue // if asked to ignore it lets continue + } + return err + } + + abs, err := filepath.Abs(directory) + if err != nil { + if f.errorsHandler(err) { + continue // if asked to ignore it lets continue + } + return err + } + + gitIgnore := gitignore.New(bytes.NewReader(c), abs, nil) + ignores = append(ignores, gitIgnore) + } + } + + // this should only happen on the first iteration + // because there should be one .gitmodules file per repository + // however we also need to support someone running in a directory of + // projects that have multiple repositories or in a go vendor + // repository etc... hence check every time + if !f.IgnoreGitModules { + if file.Name() == GitModules { + // now we need to open and parse the file + c, err := f.osReadFile(filepath.Join(directory, file.Name())) + if err != nil { + if f.errorsHandler(err) { + continue // if asked to ignore it lets continue + } + return err + } + + abs, err := filepath.Abs(directory) + if err != nil { + if f.errorsHandler(err) { + continue // if asked to ignore it lets continue + } + return err + } + + for _, gm := range extractGitModuleFolders(string(c)) { + gitIgnore := gitignore.New(strings.NewReader(gm), abs, nil) + moduleIgnores = append(moduleIgnores, gitIgnore) + } + } + } + + for _, ci := range f.CustomIgnore { + if file.Name() == ci { + c, err := f.osReadFile(filepath.Join(directory, file.Name())) + if err != nil { + if f.errorsHandler(err) { + continue // if asked to ignore it lets continue + } + return err + } + + abs, err := filepath.Abs(directory) + if err != nil { + if f.errorsHandler(err) { + continue // if asked to ignore it lets continue + } + return err + } + + gitIgnore := gitignore.New(bytes.NewReader(c), abs, nil) + customIgnores = append(customIgnores, gitIgnore) + } + } + } + + // If we have custom ignore patterns defined we should concatenate them and treat them as a single gitignore file + if len(f.CustomIgnorePatterns) > 0 { + customIgnorePatternsCombined := strings.Join(f.CustomIgnorePatterns, "\n") + gitIgnore := gitignore.New(bytes.NewReader([]byte(customIgnorePatternsCombined)), directory, nil) + customIgnores = append(customIgnores, gitIgnore) + } + + // Process files first to start feeding whatever process is consuming + // the output before traversing into directories for more files + for _, file := range files { + shouldIgnore := false + joined := filepath.Join(directory, file.Name()) + + for _, ignore := range gitignores { + // we have the following situations + // 1. none of the gitignores match + // 2. one or more match + // for #1 this means we should include the file + // for #2 this means the last one wins since it should be the most correct + if ignore.MatchIsDir(joined, false) != nil { + shouldIgnore = ignore.Ignore(joined) + } + } + + for _, ignore := range ignores { + // same rules as above + if ignore.MatchIsDir(joined, false) != nil { + shouldIgnore = ignore.Ignore(joined) + } + } + + for _, ignore := range customIgnores { + // same rules as above + if ignore.MatchIsDir(joined, false) != nil { + shouldIgnore = ignore.Ignore(joined) + } + } + + if len(f.IncludeFilename) != 0 { + // include files + shouldIgnore = !slices.ContainsFunc(f.IncludeFilename, func(allow string) bool { + return file.Name() == allow + }) + } + // Exclude comes after include as it takes precedence + for _, deny := range f.ExcludeFilename { + if file.Name() == deny { + shouldIgnore = true + break + } + } + + if len(f.IncludeFilenameRegex) != 0 { + shouldIgnore = !slices.ContainsFunc(f.IncludeFilenameRegex, func(allow *regexp.Regexp) bool { + return allow.MatchString(file.Name()) + }) + } + // Exclude comes after include as it takes precedence + for _, deny := range f.ExcludeFilenameRegex { + if deny.MatchString(file.Name()) { + shouldIgnore = true + break + } + } + + // Ignore hidden files + if !f.IncludeHidden { + s, err := IsHiddenDirEntry(file, directory) + if err != nil { + if !f.errorsHandler(err) { + return err + } + } + + if s { + shouldIgnore = true + } + } + + // Check against extensions + if len(f.AllowListExtensions) != 0 { + ext := GetExtension(file.Name()) + // try again because we could have one of those pesky ones such as something.spec.tsx + // but only if we didn't already find something to save on a bit of processing + if !slices.Contains(f.AllowListExtensions, ext) && !slices.Contains(f.AllowListExtensions, GetExtension(ext)) { + shouldIgnore = true + } + } + + if len(f.ExcludeListExtensions) != 0 { + ext := GetExtension(file.Name()) + shouldIgnore = slices.ContainsFunc(f.ExcludeListExtensions, func(deny string) bool { + return ext == deny || GetExtension(ext) == deny + }) + } + + for _, p := range f.LocationExcludePattern { + if strings.Contains(joined, p) { + shouldIgnore = true + break + } + } + + if f.IgnoreBinaryFiles { + fi, err := os.Open(filepath.Join(directory, file.Name())) + if err != nil { + if !f.errorsHandler(err) { + return err + } + } + defer func(fi *os.File) { + _ = fi.Close() + }(fi) + + buffer := make([]byte, f.IgnoreBinaryFileBytes) + + // Read up to buffer size + _, err = io.ReadFull(fi, buffer) + if err != nil && err != io.EOF && !errors.Is(err, io.ErrUnexpectedEOF) { + if !f.errorsHandler(err) { + return err + } + } + + // cheaply check if is binary file by checking for null byte. + // note that this could be improved later on by checking for magic numbers and the like + // but that should probably be its own package + for _, b := range buffer { + if b == 0 { + shouldIgnore = true + break + } + } + } + + if !shouldIgnore { + f.fileListQueue <- &File{ + Location: joined, + Filename: file.Name(), + } + } + } + + // if we are the 1st iteration IE not the root, we run in parallel + wg := sync.WaitGroup{} + + // Now we process the directories after hopefully giving the + // channel some files to process + for _, dir := range dirs { + var shouldIgnore bool + joined := filepath.Join(directory, dir.Name()) + + // Check against the ignore files we have if the file we are looking at + // should be ignored + // It is safe to always call this because the gitignores will not be added + // in previous steps + for _, ignore := range gitignores { + // we have the following situations + // 1. none of the gitignores match + // 2. one or more match + // for #1 this means we should include the file + // for #2 this means the last one wins since it should be the most correct + if ignore.MatchIsDir(joined, true) != nil { + shouldIgnore = ignore.Ignore(joined) + } + } + for _, ignore := range ignores { + // same rules as above + if ignore.MatchIsDir(joined, true) != nil { + shouldIgnore = ignore.Ignore(joined) + } + } + for _, ignore := range customIgnores { + // same rules as above + if ignore.MatchIsDir(joined, true) != nil { + shouldIgnore = ignore.Ignore(joined) + } + } + for _, ignore := range moduleIgnores { + // same rules as above + if ignore.MatchIsDir(joined, true) != nil { + shouldIgnore = ignore.Ignore(joined) + } + } + + // start by saying we didn't find it then check each possible + // choice to see if we did find it + // if we didn't find it then we should ignore + if len(f.IncludeDirectory) != 0 { + shouldIgnore = !slices.ContainsFunc(f.IncludeDirectory, func(allow string) bool { + return dir.Name() == allow + }) + } + // Confirm if there are any files in the path deny list which usually includes + // things like .git .hg and .svn + // Comes after include as it takes precedence + for _, deny := range f.ExcludeDirectory { + if isSuffixDir(joined, deny) { + shouldIgnore = true + break + } + } + + if len(f.IncludeDirectoryRegex) != 0 { + shouldIgnore = !slices.ContainsFunc(f.IncludeDirectoryRegex, func(allow *regexp.Regexp) bool { + return allow.MatchString(dir.Name()) + }) + } + // Exclude comes after include as it takes precedence + for _, deny := range f.ExcludeDirectoryRegex { + if deny.MatchString(dir.Name()) { + shouldIgnore = true + break + } + } + + // Ignore hidden directories + if !f.IncludeHidden { + s, err := IsHiddenDirEntry(dir, directory) + if err != nil { + if !f.errorsHandler(err) { + return err + } + } + + if s { + shouldIgnore = true + } + } + + for _, p := range f.LocationExcludePattern { + if strings.Contains(joined, p) { + shouldIgnore = true + break + } + } + + if !shouldIgnore { + if iteration == 0 { + wg.Add(1) + go func(iteration int, directory string, gitignores []gitignore.GitIgnore, ignores []gitignore.GitIgnore) { + _ = f.walkDirectoryRecursive(iteration+1, joined, gitignores, ignores, moduleIgnores, customIgnores) + wg.Done() + }(iteration, joined, gitignores, ignores) + } else { + err = f.walkDirectoryRecursive(iteration+1, joined, gitignores, ignores, moduleIgnores, customIgnores) + if err != nil { + return err + } + } + } + } + + wg.Wait() + + return nil +} + +// FindRepositoryRoot given the supplied directory backwards looking for .git or .hg +// directories indicating we should start our search from that +// location as it's the root. +// Returns the first directory below supplied with .git or .hg in it +// otherwise the supplied directory +func FindRepositoryRoot(startDirectory string) string { + // Firstly try to determine our real location + curdir, err := os.Getwd() + if err != nil { + return startDirectory + } + + // Check if we have .git or .hg where we are and if + // so just return because we are already there + if checkForGitOrMercurial(curdir) { + return startDirectory + } + + // We did not find something, so now we need to walk the file tree + // backwards in a cross platform way and if we find + // a match we return that + lastIndex := strings.LastIndex(curdir, string(os.PathSeparator)) + for lastIndex != -1 { + curdir = curdir[:lastIndex] + + if checkForGitOrMercurial(curdir) { + return curdir + } + + lastIndex = strings.LastIndex(curdir, string(os.PathSeparator)) + } + + // If we didn't find a good match return the supplied directory + // so that we start the search from where we started at least + // rather than the root + return startDirectory +} + +// Check if there is a .git or .hg folder in the supplied directory +func checkForGitOrMercurial(curdir string) bool { + if stat, err := os.Stat(filepath.Join(curdir, ".git")); err == nil && stat.IsDir() { + return true + } + + if stat, err := os.Stat(filepath.Join(curdir, ".hg")); err == nil && stat.IsDir() { + return true + } + + return false +} + +// GetExtension is a custom version of extracting extensions for a file +// which deals with extensions specific to code such as +// .travis.yml and the like +func GetExtension(name string) string { + name = strings.ToLower(name) + if !strings.Contains(name, ".") { + return name + } + + if strings.LastIndex(name, ".") == 0 { + return name + } + + return path.Ext(name)[1:] +} diff --git a/vendor/github.com/boyter/gocodewalker/gitmodule.go b/vendor/github.com/boyter/gocodewalker/gitmodule.go new file mode 100644 index 0000000..361a780 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/gitmodule.go @@ -0,0 +1,23 @@ +package gocodewalker + +import ( + "regexp" + "strings" +) + +func extractGitModuleFolders(input string) []string { + // Compile a regular expression to match lines starting with "path =" + re := regexp.MustCompile(`^\s*path\s*=\s*(.*)`) + output := []string{} + + for _, line := range strings.Split(input, "\n") { + // Check if the line matches the "path = " pattern + if matches := re.FindStringSubmatch(line); matches != nil { + // Extract the submodule path (which is captured in the regex group) + submodulePath := strings.TrimSpace(matches[1]) + output = append(output, submodulePath) + } + } + + return output +} diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/LICENSE b/vendor/github.com/boyter/gocodewalker/go-gitignore/LICENSE new file mode 100644 index 0000000..7c7d093 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Denormal Limited + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/README.md b/vendor/github.com/boyter/gocodewalker/go-gitignore/README.md new file mode 100644 index 0000000..b9acf36 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/README.md @@ -0,0 +1,94 @@ +# go-gitignore + +Package `go-gitignore` provides an interface for parsing `.gitignore` files, +either individually, or within a repository, and +matching paths against the retrieved patterns. Path matching is done using +[fnmatch](https://github.com/danwakefield/fnmatch) as specified by +[git](https://git-scm.com/docs/gitignore), with +support for recursive matching via the `**` pattern. + +```go +import "github.com/denormal/go-gitignore" + +// match a file against a particular .gitignore +ignore, err := gitignore.NewFromFile("/my/.gitignore") +if err != nil { + panic(err) +} +match := ignore.Match("/my/file/to.check") +if match != nil { + if match.Ignore() { + return true + } +} + +// or match against a repository +// - here we match a directory path relative to the repository +ignore, err := gitignore.NewRepository( "/my/git/repository" ) +if err != nil { + panic(err) +} +match := ignore.Relative("src/examples", true) +if match != nil { + if match.Include() { + fmt.Printf( + "include src/examples/ because of pattern %q at %s", + match, match.Position(), + ) + } +} + +// if it's not important whether a path matches, but whether it is +// ignored or included... +if ignore.Ignore("src/test") { + fmt.Println("ignore src/test") +} else if ignore.Include("src/github.com") { + fmt.Println("include src/github.com") +} +``` + +For more information see `godoc github.com/denormal/go-gitignore`. + +## Patterns + +`go-gitignore` supports the same `.gitignore` pattern format and matching rules as defined by [git](https://git-scm.com/docs/gitignore): + +* A blank line matches no files, so it can serve as a separator for readability. + +* A line starting with `#` serves as a comment. Put a backslash `\` in front of the first hash for patterns that begin with a hash. + +* Trailing spaces are ignored unless they are quoted with backslash `\`. + +* An optional prefix `!` which negates the pattern; any matching file excluded by a previous pattern will become included again. It is not possible to re-include a file if a parent directory of that file is excluded. Git doesn’t list excluded directories for performance reasons, so any patterns on contained files have no effect, no matter where they are defined. Put a backslash `\` in front of the first `!` for patterns that begin with a literal `!`, for example, `\!important!.txt`. + +* If the pattern ends with a slash, it is removed for the purpose of the following description, but it would only find a match with a directory. In other words, `foo/` will match a directory foo and paths underneath it, but will not match a regular file or a symbolic link `foo` (this is consistent with the way how pathspec works in general in Git). + +* If the pattern does not contain a slash `/`, Git treats it as a shell glob pattern and checks for a match against the pathname relative to the location of the `.gitignore` file (relative to the toplevel of the work tree if not from a `.gitignore` file). + +* Otherwise, Git treats the pattern as a shell glob suitable for consumption by `fnmatch(3)` with the `FNM_PATHNAME` flag: wildcards in the pattern will not match a `/` in the pathname. For example, `Documentation/*.html` matches `Documentation/git.html` but not `Documentation/ppc/ppc.html` or `tools/perf/Documentation/perf.html`. + +* A leading slash matches the beginning of the pathname. For example, `/*.c` matches `cat-file.c` but not `mozilla-sha1/sha1.c`. + +Two consecutive asterisks `**` in patterns matched against full pathname may have special meaning: + +* A leading `**` followed by a slash means match in all directories. For example, `**/foo` matches file or directory `foo` anywhere, the same as pattern `foo`. `**/foo/bar` matches file or directory `bar` anywhere that is directly under directory `foo`. + +* A trailing `/**` matches everything inside. For example, `abc/**` matches all files inside directory `abc`, relative to the location of the `.gitignore` file, with infinite depth. + +* A slash followed by two consecutive asterisks then a slash matches zero or more directories. For example, `a/**/b` matches `a/b`, `a/x/b`, `a/x/y/b` and so on. + +* Other consecutive asterisks are considered invalid. + +## Installation + +`go-gitignore` can be installed using the standard Go approach: + +```go +go get github.com/denormal/go-gitignore +``` + +## License + +Copyright (c) 2016 Denormal Limited + +[MIT License](LICENSE) diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/cache.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/cache.go new file mode 100644 index 0000000..f66e352 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/cache.go @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "sync" +) + +// Cache is the interface for the GitIgnore cache +type Cache interface { + // Set stores the GitIgnore ignore against its path. + Set(path string, ig GitIgnore) + + // Get attempts to retrieve an GitIgnore instance associated with the given + // path. If the path is not known nil is returned. + Get(path string) GitIgnore +} + +// cache is the default thread-safe cache implementation +type cache struct { + _i map[string]GitIgnore + _lock sync.Mutex +} + +// NewCache returns a Cache instance. This is a thread-safe, in-memory cache +// for GitIgnore instances. +func NewCache() Cache { + return &cache{} +} // Cache() + +// Set stores the GitIgnore ignore against its path. +func (c *cache) Set(path string, ignore GitIgnore) { + if ignore == nil { + return + } + + // ensure the map is defined + if c._i == nil { + c._i = make(map[string]GitIgnore) + } + + // set the cache item + c._lock.Lock() + c._i[path] = ignore + c._lock.Unlock() +} // Set() + +// Get attempts to retrieve an GitIgnore instance associated with the given +// path. If the path is not known nil is returned. +func (c *cache) Get(path string) GitIgnore { + c._lock.Lock() + _ignore, _ok := c._i[path] + c._lock.Unlock() + if _ok { + return _ignore + } else { + return nil + } +} // Get() + +// ensure cache supports the Cache interface +var _ Cache = &cache{} diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/doc.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/doc.go new file mode 100644 index 0000000..3882a8e --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/doc.go @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: MIT + +/* +Package gitignore provides an interface for parsing .gitignore files, +either individually, or within a repository, and +matching paths against the retrieved patterns. Path matching is done using +fnmatch as specified by git (see https://git-scm.com/docs/gitignore), with +support for recursive matching via the "**" pattern. +*/ +package gitignore diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/error.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/error.go new file mode 100644 index 0000000..7d94ffc --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/error.go @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +type Error interface { + error + + // Position returns the position of the error within the .gitignore file + // (if any) + Position() Position + + // Underlying returns the underlying error, permitting direct comparison + // against the wrapped error. + Underlying() error +} + +type err struct { + error + _position Position +} // err() + +// NewError returns a new Error instance for the given error e and position p. +func NewError(e error, p Position) Error { + return &err{error: e, _position: p} +} // NewError() + +func (e *err) Position() Position { return e._position } + +func (e *err) Underlying() error { return e.error } + +// ensure err satisfies the Error interface +var _ Error = &err{} diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/errors.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/errors.go new file mode 100644 index 0000000..e8b4db5 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/errors.go @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "errors" +) + +var ( + ErrCarriageReturnError = errors.New("unexpected carriage return '\\r'") + ErrInvalidPatternError = errors.New("invalid pattern") + ErrInvalidDirectoryError = errors.New("invalid directory") +) diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/exclude.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/exclude.go new file mode 100644 index 0000000..4018074 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/exclude.go @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "os" + "path/filepath" +) + +// exclude attempts to return the GitIgnore instance for the +// $GIT_DIR/info/exclude from the working copy to which path belongs. +func exclude(path string) (GitIgnore, error) { + // attempt to locate GIT_DIR + _gitdir := os.Getenv("GIT_DIR") + if _gitdir == "" { + _gitdir = filepath.Join(path, ".git") + } + _info, _err := os.Stat(_gitdir) + if _err != nil { + if os.IsNotExist(_err) { + return nil, nil + } else { + return nil, _err + } + } else if !_info.IsDir() { + return nil, nil + } + + // is there an info/exclude file within this directory? + _file := filepath.Join(_gitdir, "info", "exclude") + _, _err = os.Stat(_file) + if _err != nil { + if os.IsNotExist(_err) { + return nil, nil + } else { + return nil, _err + } + } + + // attempt to load the exclude file + return NewFromFile(_file) +} // exclude() diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/gitignore.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/gitignore.go new file mode 100644 index 0000000..e627d3e --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/gitignore.go @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "io" + "os" + "path/filepath" + "runtime" + "strings" + "sync" +) + +// use an empty GitIgnore for cached lookups +var empty = &ignore{} + +// GitIgnore is the interface to .gitignore files and repositories. It defines +// methods for testing files for matching the .gitignore file, and then +// determining whether a file should be ignored or included. +type GitIgnore interface { + // Base returns the directory containing the .gitignore file. + Base() string + + // Match attempts to match the path against this GitIgnore, and will + // return its Match if successful. Match will invoke the GitIgnore error + // handler (if defined) if it is not possible to determine the absolute + // path of the given path, or if its not possible to determine if the + // path represents a file or a directory. If an error occurs, Match + // returns nil and the error handler (if defined via New, NewWithErrors + // or NewWithCache) will be invoked. + Match(path string) Match + + MatchIsDir(path string, _isdir bool) Match + + // Absolute attempts to match an absolute path against this GitIgnore. If + // the path is not located under the base directory of this GitIgnore, or + // is not matched by this GitIgnore, nil is returned. + Absolute(string, bool) Match + + // Relative attempts to match a path relative to the GitIgnore base + // directory. isdir is used to indicate whether the path represents a file + // or a directory. If the path is not matched by the GitIgnore, nil is + // returned. + Relative(path string, isdir bool) Match + + // Ignore returns true if the path is ignored by this GitIgnore. Paths + // that are not matched by this GitIgnore are not ignored. Internally, + // Ignore uses Match, and will return false if Match() returns nil for path. + Ignore(path string) bool + + // Include returns true if the path is included by this GitIgnore. Paths + // that are not matched by this GitIgnore are always included. Internally, + // Include uses Match, and will return true if Match() returns nil for path. + Include(path string) bool +} + +// ignore is the implementation of a .gitignore file. +type ignore struct { + _base string + _pattern []Pattern + _errors func(Error) bool +} + +// NewGitIgnore creates a new GitIgnore instance from the patterns listed in t, +// representing a .gitignore file in the base directory. If errors is given, it +// will be invoked for every error encountered when parsing the .gitignore +// patterns. Parsing will terminate if errors is called and returns false, +// otherwise, parsing will continue until end of file has been reached. +func New(r io.Reader, base string, errors func(Error) bool) GitIgnore { + // do we have an error handler? + _errors := errors + if _errors == nil { + _errors = func(e Error) bool { return true } + } + + // extract the patterns from the reader + _parser := NewParser(r, _errors) + _patterns := _parser.Parse() + + return &ignore{_base: base, _pattern: _patterns, _errors: _errors} +} // New() + +// NewFromFile creates a GitIgnore instance from the given file. An error +// will be returned if file cannot be opened or its absolute path determined. +func NewFromFile(file string) (GitIgnore, error) { + // define an error handler to catch any file access errors + // - record the first encountered error + var _error Error + _errors := func(e Error) bool { + if _error == nil { + _error = e + } + return true + } + + // attempt to retrieve the GitIgnore represented by this file + _ignore := NewWithErrors(file, _errors) + + // did we encounter an error? + // - if the error has a zero Position then it was encountered + // before parsing was attempted, so we return that error + if _error != nil { + if _error.Position().Zero() { + return nil, _error.Underlying() + } + } + + // otherwise, we ignore the parser errors + return _ignore, nil +} // NewFromFile() + +// NewWithErrors creates a GitIgnore instance from the given file. +// If errors is given, it will be invoked for every error encountered when +// parsing the .gitignore patterns. Parsing will terminate if errors is called +// and returns false, otherwise, parsing will continue until end of file has +// been reached. NewWithErrors returns nil if the .gitignore could not be read. +func NewWithErrors(file string, errors func(Error) bool) GitIgnore { + var _err error + + // do we have an error handler? + _file := file + _errors := errors + if _errors == nil { + _errors = func(e Error) bool { return true } + } else { + // augment the error handler to include the .gitignore file name + // - we do this here since the parser and lexer interfaces are + // not aware of file names + _errors = func(e Error) bool { + // augment the position with the file name + _position := e.Position() + _position.File = _file + + // create a new error with the updated Position + _error := NewError(e.Underlying(), _position) + + // invoke the original error handler + return errors(_error) + } + } + + // we need the absolute path for the GitIgnore base + _file, _err = filepath.Abs(file) + if _err != nil { + _errors(NewError(_err, Position{})) + return nil + } + _base := filepath.Dir(_file) + + // attempt to open the ignore file to create the io.Reader + _fh, _err := os.Open(_file) + if _err != nil { + _errors(NewError(_err, Position{})) + return nil + } + + // return the GitIgnore instance + return New(_fh, _base, _errors) +} // NewWithErrors() + +// NewWithCache returns a GitIgnore instance (using NewWithErrors) +// for the given file. If the file has been loaded before, its GitIgnore +// instance will be returned from the cache rather than being reloaded. If +// cache is not defined, NewWithCache will behave as NewWithErrors +// +// If NewWithErrors returns nil, NewWithCache will store an empty +// GitIgnore (i.e. no patterns) against the file to prevent repeated parse +// attempts on subsequent requests for the same file. Subsequent calls to +// NewWithCache for a file that could not be loaded due to an error will +// return nil. +// +// If errors is given, it will be invoked for every error encountered when +// parsing the .gitignore patterns. Parsing will terminate if errors is called +// and returns false, otherwise, parsing will continue until end of file has +// been reached. +func NewWithCache(file string, cache Cache, errors func(Error) bool) GitIgnore { + // do we have an error handler? + _errors := errors + if _errors == nil { + _errors = func(e Error) bool { return true } + } + + // use the file absolute path as its key into the cache + _abs, _err := filepath.Abs(file) + if _err != nil { + _errors(NewError(_err, Position{})) + return nil + } + + var _ignore GitIgnore + if cache != nil { + _ignore = cache.Get(_abs) + } + if _ignore == nil { + _ignore = NewWithErrors(file, _errors) + if _ignore == nil { + // if the load failed, cache an empty GitIgnore to prevent + // further attempts to load this file + _ignore = empty + } + if cache != nil { + cache.Set(_abs, _ignore) + } + } + + // return the ignore (if we have it) + if _ignore == empty { + return nil + } else { + return _ignore + } +} // NewWithCache() + +// Base returns the directory containing the .gitignore file for this GitIgnore. +func (i *ignore) Base() string { + return i._base +} // Base() + +// Match attempts to match the path against this GitIgnore, and will +// return its Match if successful. Match will invoke the GitIgnore error +// handler (if defined) if it is not possible to determine the absolute +// path of the given path, or if its not possible to determine if the +// path represents a file or a directory. If an error occurs, Match +// returns nil and the error handler (if defined via New, NewWithErrors +// or NewWithCache) will be invoked. +func (i *ignore) Match(path string) Match { + // ensure we have the absolute path for the given file + _path, _err := filepath.Abs(path) + if _err != nil { + i._errors(NewError(_err, Position{})) + return nil + } + + // is the path a file or a directory? + _info, _err := os.Stat(_path) + if _err != nil { + i._errors(NewError(_err, Position{})) + return nil + } + _isdir := _info.IsDir() + + // attempt to match the absolute path + return i.Absolute(_path, _isdir) +} // Match() + +var matchIsDirMutex = sync.Mutex{} +var matchIsDirCache = map[string]string{} + +func (i *ignore) MatchIsDir(path string, _isdir bool) Match { + + // ensure we have the absolute path for the given file + matchIsDirMutex.Lock() + _path, ok := matchIsDirCache[path] + matchIsDirMutex.Unlock() + if !ok { + var _err error + _path, _err = filepath.Abs(path) + if _err != nil { + i._errors(NewError(_err, Position{})) + return nil + } + matchIsDirMutex.Lock() + matchIsDirCache[path] = _path + matchIsDirMutex.Unlock() + } + + // attempt to match the absolute path + return i.Absolute(_path, _isdir) +} // Match() + +// Absolute attempts to match an absolute path against this GitIgnore. If +// the path is not located under the base directory of this GitIgnore, or +// is not matched by this GitIgnore, nil is returned. +func (i *ignore) Absolute(path string, isdir bool) Match { + // does the file share the same directory as this ignore file? + if !strings.HasPrefix(path, i._base) { + return nil + } + + // extract the relative path of this file + _prefix := len(i._base) + 1 // BOYTERWASHERE + //_prefix := len(i._base) + _rel := string(path[_prefix:]) + return i.Relative(_rel, isdir) +} // Absolute() + +// Relative attempts to match a path relative to the GitIgnore base +// directory. isdir is used to indicate whether the path represents a file +// or a directory. If the path is not matched by the GitIgnore, nil is +// returned. +func (i *ignore) Relative(path string, isdir bool) Match { + // if we are on Windows, then translate the path to Unix form + _rel := path + if runtime.GOOS == "windows" { + _rel = filepath.ToSlash(_rel) + } + + // iterate over the patterns for this ignore file + // - iterate in reverse, since later patterns overwrite earlier + for _i := len(i._pattern) - 1; _i >= 0; _i-- { + _pattern := i._pattern[_i] + if _pattern.Match(_rel, isdir) { + return _pattern + } + } + + // we don't match this file + return nil +} // Relative() + +// Ignore returns true if the path is ignored by this GitIgnore. Paths +// that are not matched by this GitIgnore are not ignored. Internally, +// Ignore uses Match, and will return false if Match() returns nil for path. +func (i *ignore) Ignore(path string) bool { + _match := i.Match(path) + if _match != nil { + return _match.Ignore() + } + + // we didn't match this path, so we don't ignore it + return false +} // Ignore() + +// Include returns true if the path is included by this GitIgnore. Paths +// that are not matched by this GitIgnore are always included. Internally, +// Include uses Match, and will return true if Match() returns nil for path. +func (i *ignore) Include(path string) bool { + _match := i.Match(path) + if _match != nil { + return _match.Include() + } + + // we didn't match this path, so we include it + return true +} // Include() + +// ensure Ignore satisfies the GitIgnore interface +var _ GitIgnore = &ignore{} diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/lexer.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/lexer.go new file mode 100644 index 0000000..db3b041 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/lexer.go @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "bufio" + "io" +) + +// +// inspired by https://blog.gopheracademy.com/advent-2014/parsers-lexers/ +// + +// lexer is the implementation of the .gitignore lexical analyser +type lexer struct { + _r *bufio.Reader + _unread []rune + _offset int + _line int + _column int + _previous []int +} // lexer{} + +// Lexer is the interface to the lexical analyser for .gitignore files +type Lexer interface { + // Next returns the next Token from the Lexer reader. If an error is + // encountered, it will be returned as an Error instance, detailing the + // error and its position within the stream. + Next() (*Token, Error) + + // Position returns the current position of the Lexer. + Position() Position + + // String returns the string representation of the current position of the + // Lexer. + String() string +} + +// NewLexer returns a Lexer instance for the io.Reader r. +func NewLexer(r io.Reader) Lexer { + return &lexer{_r: bufio.NewReader(r), _line: 1, _column: 1} +} // NewLexer() + +// Next returns the next Token from the Lexer reader. If an error is +// encountered, it will be returned as an Error instance, detailing the error +// and its position within the stream. +func (l *lexer) Next() (*Token, Error) { + // are we at the beginning of the line? + _beginning := l.beginning() + + // read the next rune + _r, _err := l.read() + if _err != nil { + return nil, _err + } + + switch _r { + // end of file + case _EOF: + return l.token(EOF, nil, nil) + + // whitespace ' ', '\t' + case _SPACE: + fallthrough + case _TAB: + l.unread(_r) + _rtn, _err := l.whitespace() + return l.token(WHITESPACE, _rtn, _err) + + // end of line '\n' or '\r\n' + case _CR: + fallthrough + case _NEWLINE: + l.unread(_r) + _rtn, _err := l.eol() + return l.token(EOL, _rtn, _err) + + // separator '/' + case _SEPARATOR: + return l.token(SEPARATOR, []rune{_r}, nil) + + // '*' or any '**' + case _WILDCARD: + // is the wildcard followed by another wildcard? + // - does this represent the "any" token (i.e. "**") + _next, _err := l.peek() + if _err != nil { + return nil, _err + } else if _next == _WILDCARD { + // we know read() will succeed here since we used peek() above + _, _ = l.read() + return l.token(ANY, []rune{_WILDCARD, _WILDCARD}, nil) + } + + // we have a single wildcard, so treat this as a pattern + l.unread(_r) + _rtn, _err := l.pattern() + return l.token(PATTERN, _rtn, _err) + + // comment '#' + case _COMMENT: + l.unread(_r) + + // if we are at the start of the line, then we treat this as a comment + if _beginning { + _rtn, _err := l.comment() + return l.token(COMMENT, _rtn, _err) + } + + // otherwise, we regard this as a pattern + _rtn, _err := l.pattern() + return l.token(PATTERN, _rtn, _err) + + // negation '!' + case _NEGATION: + if _beginning { + return l.token(NEGATION, []rune{_r}, nil) + } + fallthrough + + // pattern + default: + l.unread(_r) + _rtn, _err := l.pattern() + return l.token(PATTERN, _rtn, _err) + } +} // Next() + +// Position returns the current position of the Lexer. +func (l *lexer) Position() Position { + return Position{"", l._line, l._column, l._offset} +} // Position() + +// String returns the string representation of the current position of the +// Lexer. +func (l *lexer) String() string { + return l.Position().String() +} // String() + +// +// private methods +// + +// read the next rune from the stream. Return an Error if there is a problem +// reading from the stream. If the end of stream is reached, return the EOF +// Token. +func (l *lexer) read() (rune, Error) { + var _r rune + var _err error + + // do we have any unread runes to read? + _length := len(l._unread) + if _length > 0 { + _r = l._unread[_length-1] + l._unread = l._unread[:_length-1] + + // otherwise, attempt to read a new rune + } else { + _r, _, _err = l._r.ReadRune() + if _err == io.EOF { + return _EOF, nil + } + } + + // increment the offset and column counts + l._offset++ + l._column++ + + return _r, l.err(_err) +} // read() + +// unread returns the given runes to the stream, making them eligible to be +// read again. The runes are returned in the order given, so the last rune +// specified will be the next rune read from the stream. +func (l *lexer) unread(r ...rune) { + // ignore EOF runes + _r := make([]rune, 0) + for _, _rune := range r { + if _rune != _EOF { + _r = append(_r, _rune) + } + } + + // initialise the unread rune list if necessary + if l._unread == nil { + l._unread = make([]rune, 0) + } + if len(_r) != 0 { + l._unread = append(l._unread, _r...) + } + + // decrement the offset and column counts + // - we have to take care of column being 0 + // - at present we can only unwind across a single line boundary + _length := len(_r) + for ; _length > 0; _length-- { + l._offset-- + if l._column == 1 { + _length := len(l._previous) + if _length > 0 { + l._column = l._previous[_length-1] + l._previous = l._previous[:_length-1] + l._line-- + } + } else { + l._column-- + } + } +} // unread() + +// peek returns the next rune in the stream without consuming it (i.e. it will +// be returned by the next call to read or peek). peek will return an error if +// there is a problem reading from the stream. +func (l *lexer) peek() (rune, Error) { + // read the next rune + _r, _err := l.read() + if _err != nil { + return _r, _err + } + + // unread & return the rune + l.unread(_r) + return _r, _err +} // peek() + +// newline adjusts the positional counters when an end of line is reached +func (l *lexer) newline() { + // adjust the counters for the new line + if l._previous == nil { + l._previous = make([]int, 0) + } + l._previous = append(l._previous, l._column) + l._column = 1 + l._line++ +} // newline() + +// comment reads all runes until a newline or end of file is reached. An +// error is returned if an error is encountered reading from the stream. +func (l *lexer) comment() ([]rune, Error) { + _comment := make([]rune, 0) + + // read until we reach end of line or end of file + // - as we are in a comment, we ignore escape characters + for { + _next, _err := l.read() + if _err != nil { + return _comment, _err + } + + // read until we have end of line or end of file + switch _next { + case _CR: + fallthrough + case _NEWLINE: + fallthrough + case _EOF: + // return the read run to the stream and stop + l.unread(_next) + return _comment, nil + } + + // otherwise, add this run to the comment + _comment = append(_comment, _next) + } +} // comment() + +// escape attempts to read an escape sequence (e.g. '\ ') form the input +// stream. An error will be returned if there is an error reading from the +// stream. escape returns just the escape rune if the following rune is either +// end of line or end of file (since .gitignore files do not support line +// continuations). +func (l *lexer) escape() ([]rune, Error) { + // attempt to process the escape sequence + _peek, _err := l.peek() + if _err != nil { + return nil, _err + } + + // what is the next rune after the escape? + switch _peek { + // are we at the end of the line or file? + // - we return just the escape rune + case _CR: + fallthrough + case _NEWLINE: + fallthrough + case _EOF: + return []rune{_ESCAPE}, nil + } + + // otherwise, return the escape and the next rune + // - we know read() will succeed here since we used peek() above + _, _ = l.read() + return []rune{_ESCAPE, _peek}, nil +} // escape() + +// eol returns all runes from the current position to the end of the line. An +// error is returned if there is a problem reading from the stream, or if a +// carriage return character '\r' is encountered that is not followed by a +// newline '\n'. +func (l *lexer) eol() ([]rune, Error) { + // read the to the end of the line + // - we should only be called here when we encounter an end of line + // sequence + _line := make([]rune, 0, 1) + + // loop until there's nothing more to do + for { + _next, _err := l.read() + if _err != nil { + return _line, _err + } + + // read until we have a newline or we're at end of file + switch _next { + // end of file + case _EOF: + return _line, nil + + // carriage return - we expect to see a newline next + case _CR: + _line = append(_line, _next) + _next, _err = l.read() + if _err != nil { + return _line, _err + } else if _next != _NEWLINE { + l.unread(_next) + return _line, l.err(ErrCarriageReturnError) + } + fallthrough + + // newline + case _NEWLINE: + _line = append(_line, _next) + return _line, nil + } + } +} // eol() + +// whitespace returns all whitespace (i.e. ' ' and '\t') runes in a sequence, +// or an error if there is a problem reading the next runes. +func (l *lexer) whitespace() ([]rune, Error) { + // read until we hit the first non-whitespace rune + _ws := make([]rune, 0, 1) + + // loop until there's nothing more to do + for { + _next, _err := l.read() + if _err != nil { + return _ws, _err + } + + // what is this next rune? + switch _next { + // space or tab is consumed + case _SPACE: + fallthrough + case _TAB: + //nolint:staticcheck // SA4011: ineffective break statement (deliberate) + break + + // non-whitespace rune + default: + // return the rune to the buffer and we're done + l.unread(_next) + return _ws, nil + } + + // add this rune to the whitespace + _ws = append(_ws, _next) + } +} // whitespace() + +// pattern returns all runes representing a file or path pattern, delimited +// either by unescaped whitespace, a path separator '/' or enf of file. An +// error is returned if a problem is encountered reading from the stream. +func (l *lexer) pattern() ([]rune, Error) { + // read until we hit the first whitespace/end of line/eof rune + _pattern := make([]rune, 0, 1) + + // loop until there's nothing more to do + for { + _r, _err := l.read() + if _err != nil { + return _pattern, _err + } + + // what is the next rune? + switch _r { + // whitespace, newline, end of file, separator + // - this is the end of the pattern + case _SPACE: + fallthrough + case _TAB: + fallthrough + case _CR: + fallthrough + case _NEWLINE: + fallthrough + case _SEPARATOR: + fallthrough + case _EOF: + // return what we have + l.unread(_r) + return _pattern, nil + + // a wildcard is the end of the pattern if it is part of any '**' + case _WILDCARD: + _next, _err := l.peek() + if _err != nil { + return _pattern, _err + } else if _next == _WILDCARD { + l.unread(_r) + return _pattern, _err + } else { + _pattern = append(_pattern, _r) + } + + // escape sequence - consume the next rune + case _ESCAPE: + _escape, _err := l.escape() + if _err != nil { + return _pattern, _err + } + + // add the escape sequence as part of the pattern + _pattern = append(_pattern, _escape...) + + // any other character, we add to the pattern + default: + _pattern = append(_pattern, _r) + } + } +} // pattern() + +// token returns a Token instance of the given type_ represented by word runes. +func (l *lexer) token(type_ TokenType, word []rune, e Error) (*Token, Error) { + // if we have an error, then we return a BAD token + if e != nil { + type_ = BAD + } + + // extract the lexer position + // - the column is taken from the current column position + // minus the length of the consumed "word" + _word := len(word) + _column := l._column - _word + _offset := l._offset - _word + position := Position{"", l._line, _column, _offset} + + // if this is a newline token, we adjust the line & column counts + if type_ == EOL { + l.newline() + } + + // return the Token + return NewToken(type_, word, position), e +} // token() + +// err returns an Error encapsulating the error e and the current Lexer +// position. +func (l *lexer) err(e error) Error { + // do we have an error? + if e == nil { + return nil + } else { + return NewError(e, l.Position()) + } +} // err() + +// beginning returns true if the Lexer is at the start of a new line. +func (l *lexer) beginning() bool { + return l._column == 1 +} // beginning() + +// ensure the lexer conforms to the lexer interface +var _ Lexer = &lexer{} diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/match.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/match.go new file mode 100644 index 0000000..5871ede --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/match.go @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +// Match represents the interface of successful matches against a .gitignore +// pattern set. A Match can be queried to determine whether the matched path +// should be ignored or included (i.e. was the path matched by a negated +// pattern), and to extract the position of the pattern within the .gitignore, +// and a string representation of the pattern. +type Match interface { + // Ignore returns true if the match pattern describes files or paths that + // should be ignored. + Ignore() bool + + // Include returns true if the match pattern describes files or paths that + // should be included. + Include() bool + + // String returns a string representation of the matched pattern. + String() string + + // Position returns the position in the .gitignore file at which the + // matching pattern was defined. + Position() Position +} diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/parser.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/parser.go new file mode 100644 index 0000000..70618f2 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/parser.go @@ -0,0 +1,446 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "io" +) + +// Parser is the interface for parsing .gitignore files and extracting the set +// of patterns specified in the .gitignore file. +type Parser interface { + // Parse returns all well-formed .gitignore Patterns contained within the + // parser stream. Parsing will terminate at the end of the stream, or if + // the parser error handler returns false. + Parse() []Pattern + + // Next returns the next well-formed .gitignore Pattern from the parser + // stream. If an error is encountered, and the error handler is either + // not defined, or returns true, Next will skip to the end of the current + // line and attempt to parse the next Pattern. If the error handler + // returns false, or the parser reaches the end of the stream, Next + // returns nil. + Next() Pattern + + // Position returns the current position of the parser in the input stream. + Position() Position +} // Parser{} + +// parser is the implementation of the .gitignore parser +type parser struct { + _lexer Lexer + _undo []*Token + _error func(Error) bool +} // parser{} + +// NewParser returns a new Parser instance for the given stream r. +// If err is not nil, it will be called for every error encountered during +// parsing. Parsing will terminate at the end of the stream, or if err +// returns false. +func NewParser(r io.Reader, err func(Error) bool) Parser { + return &parser{_lexer: NewLexer(r), _error: err} +} // NewParser() + +// Parse returns all well-formed .gitignore Patterns contained within the +// parser stream. Parsing will terminate at the end of the stream, or if +// the parser error handler returns false. +func (p *parser) Parse() []Pattern { + // keep parsing until there's no more patterns + _patterns := make([]Pattern, 0) + for { + _pattern := p.Next() + if _pattern == nil { + return _patterns + } + _patterns = append(_patterns, _pattern) + } +} // Parse() + +// Next returns the next well-formed .gitignore Pattern from the parser stream. +// If an error is encountered, and the error handler is either not defined, or +// returns true, Next will skip to the end of the current line and attempt to +// parse the next Pattern. If the error handler returns false, or the parser +// reaches the end of the stream, Next returns nil. +func (p *parser) Next() Pattern { + // keep searching until we find the next pattern, or until we + // reach the end of the file + for { + _token, _err := p.next() + if _err != nil { + if !p.errors(_err) { + return nil + } + + // we got an error from the lexer, so skip the remainder + // of this line and try again from the next line + for _err != nil { + _err = p.skip() + if _err != nil { + if !p.errors(_err) { + return nil + } + } + } + continue + } + + switch _token.Type { + // we're at the end of the file + case EOF: + return nil + + // we have a blank line or comment + case EOL: + continue + case COMMENT: + continue + + // otherwise, attempt to build the next pattern + default: + _pattern, _err := p.build(_token) + if _err != nil { + if !p.errors(_err) { + return nil + } + + // we encountered an error parsing the retrieved tokens + // - skip to the end of the line + for _err != nil { + _err = p.skip() + if _err != nil { + if !p.errors(_err) { + return nil + } + } + } + + // skip to the next token + continue + } else if _pattern != nil { + return _pattern + } + } + } +} // Next() + +// Position returns the current position of the parser in the input stream. +func (p *parser) Position() Position { + // if we have any previously read tokens, then the token at + // the end of the "undo" list (most recently "undone") gives the + // position of the parser + _length := len(p._undo) + if _length != 0 { + return p._undo[_length-1].Position + } + + // otherwise, return the position of the lexer + return p._lexer.Position() +} // Position() + +// +// private methods +// + +// build attempts to build a well-formed .gitignore Pattern starting from the +// given Token t. An Error will be returned if the sequence of tokens returned +// by the Lexer does not represent a valid Pattern. +func (p *parser) build(t *Token) (Pattern, Error) { + // attempt to create a valid pattern + switch t.Type { + // we have a negated pattern + case NEGATION: + return p.negation(t) + + // attempt to build a path specification + default: + return p.path(t) + } +} // build() + +// negation attempts to build a well-formed negated .gitignore Pattern starting +// from the negation Token t. As with build, negation returns an Error if the +// sequence of tokens returned by the Lexer does not represent a valid Pattern. +func (p *parser) negation(t *Token) (Pattern, Error) { + // a negation appears before a path specification, so + // skip the negation token + _next, _err := p.next() + if _err != nil { + return nil, _err + } + + // extract the sequence of tokens for this path + _tokens, _err := p.sequence(_next) + if _err != nil { + return nil, _err + } + + // include the "negation" token at the front of the sequence + _tokens = append([]*Token{t}, _tokens...) + + // return the Pattern instance + return NewPattern(_tokens), nil +} // negation() + +// path attempts to build a well-formed .gitignore Pattern representing a path +// specification, starting with the Token t. If the sequence of tokens returned +// by the Lexer does not represent a valid Pattern, path returns an Error. +// Trailing whitespace is dropped from the sequence of pattern tokens. +func (p *parser) path(t *Token) (Pattern, Error) { + // extract the sequence of tokens for this path + _tokens, _err := p.sequence(t) + if _err != nil { + return nil, _err + } + + // remove trailing whitespace tokens + _length := len(_tokens) + for _length > 0 { + // if we have a non-whitespace token, we can stop + _length-- + if _tokens[_length].Type != WHITESPACE { + break + } + + // otherwise, truncate the token list + _tokens = _tokens[:_length] + } + + // return the Pattern instance + return NewPattern(_tokens), nil +} // path() + +// sequence attempts to extract a well-formed Token sequence from the Lexer +// representing a .gitignore Pattern. sequence returns an Error if the +// retrieved sequence of tokens does not represent a valid Pattern. +func (p *parser) sequence(t *Token) ([]*Token, Error) { + // extract the sequence of tokens for a valid path + // - this excludes the negation token, which is handled as + // a special case before sequence() is called + switch t.Type { + // the path starts with a separator + case SEPARATOR: + return p.separator(t) + + // the path starts with the "any" pattern ("**") + case ANY: + return p.any(t) + + // the path starts with whitespace, wildcard or a pattern + case WHITESPACE: + fallthrough + case PATTERN: + return p.pattern(t) + } + + // otherwise, we have an invalid specification + p.undo(t) + return nil, p.err(ErrInvalidPatternError) +} // sequence() + +// separator attempts to retrieve a valid sequence of tokens that may appear +// after the path separator '/' Token t. An Error is returned if the sequence if +// tokens is not valid, or if there is an error extracting tokens from the +// input stream. +func (p *parser) separator(t *Token) ([]*Token, Error) { + // build a list of tokens that may appear after a separator + _tokens := []*Token{t} + _token, _err := p.next() + if _err != nil { + return _tokens, _err + } + + // what tokens are we allowed to have follow a separator? + switch _token.Type { + // a separator can be followed by a pattern or + // an "any" pattern (i.e. "**") + case ANY: + _next, _err := p.any(_token) + return append(_tokens, _next...), _err + + case WHITESPACE: + fallthrough + case PATTERN: + _next, _err := p.pattern(_token) + return append(_tokens, _next...), _err + + // if we encounter end of line or file we are done + case EOL: + fallthrough + case EOF: + return _tokens, nil + + // a separator can be followed by another separator + // - it's not ideal, and not very useful, but it's interpreted + // as a single separator + // - we could clean it up here, but instead we pass + // everything down to the matching later on + case SEPARATOR: + _next, _err := p.separator(_token) + return append(_tokens, _next...), _err + } + + // any other token is invalid + p.undo(_token) + return _tokens, p.err(ErrInvalidPatternError) +} // separator() + +// any attempts to retrieve a valid sequence of tokens that may appear +// after the any '**' Token t. An Error is returned if the sequence if +// tokens is not valid, or if there is an error extracting tokens from the +// input stream. +func (p *parser) any(t *Token) ([]*Token, Error) { + // build the list of tokens that may appear after "any" (i.e. "**") + _tokens := []*Token{t} + _token, _err := p.next() + if _err != nil { + return _tokens, _err + } + + // what tokens are we allowed to have follow an "any" symbol? + switch _token.Type { + // an "any" token may only be followed by a separator + case SEPARATOR: + _next, _err := p.separator(_token) + return append(_tokens, _next...), _err + + // whitespace is acceptable if it takes us to the end of the line + case WHITESPACE: + return _tokens, p.eol() + + // if we encounter end of line or file we are done + case EOL: + fallthrough + case EOF: + return _tokens, nil + } + + // any other token is invalid + p.undo(_token) + return _tokens, p.err(ErrInvalidPatternError) +} // any() + +// pattern attempts to retrieve a valid sequence of tokens that may appear +// after the path pattern Token t. An Error is returned if the sequence if +// tokens is not valid, or if there is an error extracting tokens from the +// input stream. +func (p *parser) pattern(t *Token) ([]*Token, Error) { + // build the list of tokens that may appear after a pattern + _tokens := []*Token{t} + _token, _err := p.next() + if _err != nil { + return _tokens, _err + } + + // what tokens are we allowed to have follow a pattern? + var _next []*Token + switch _token.Type { + case SEPARATOR: + _next, _err = p.separator(_token) + return append(_tokens, _next...), _err + + case WHITESPACE: + fallthrough + case PATTERN: + _next, _err = p.pattern(_token) + return append(_tokens, _next...), _err + + // if we encounter end of line or file we are done + case EOL: + fallthrough + case EOF: + return _tokens, nil + } + + // any other token is invalid + p.undo(_token) + return _tokens, p.err(ErrInvalidPatternError) +} // pattern() + +// eol attempts to consume the next Lexer token to read the end of line or end +// of file. If a EOL or EOF is not reached , eol will return an error. +func (p *parser) eol() Error { + // are we at the end of the line? + _token, _err := p.next() + if _err != nil { + return _err + } + + // have we encountered whitespace only? + switch _token.Type { + // if we're at the end of the line or file, we're done + case EOL: + fallthrough + case EOF: + p.undo(_token) + return nil + } + + // otherwise, we have an invalid pattern + p.undo(_token) + return p.err(ErrInvalidPatternError) +} // eol() + +// next returns the next token from the Lexer, or an error if there is a +// problem reading from the input stream. +func (p *parser) next() (*Token, Error) { + // do we have any previously read tokens? + _length := len(p._undo) + if _length > 0 { + _token := p._undo[_length-1] + p._undo = p._undo[:_length-1] + return _token, nil + } + + // otherwise, attempt to retrieve the next token from the lexer + return p._lexer.Next() +} // next() + +// skip reads Tokens from the input until the end of line or end of file is +// reached. If there is a problem reading tokens, an Error is returned. +func (p *parser) skip() Error { + // skip to the next end of line or end of file token + for { + _token, _err := p.next() + if _err != nil { + return _err + } + + // if we have an end of line or file token, then we can stop + switch _token.Type { + case EOL: + fallthrough + case EOF: + return nil + } + } +} // skip() + +// undo returns the given Token t to the parser input stream to be retrieved +// again on a subsequent call to next. +func (p *parser) undo(t *Token) { + // add this token to the list of previously read tokens + // - initialise the undo list if required + if p._undo == nil { + p._undo = make([]*Token, 0, 1) + } + p._undo = append(p._undo, t) +} // undo() + +// err returns an Error for the error e, capturing the current parser Position. +func (p *parser) err(e error) Error { + // convert the error to include the parser position + return NewError(e, p.Position()) +} // err() + +// errors returns the response from the parser error handler to the Error e. If +// no error handler has been configured for this parser, errors returns true. +func (p *parser) errors(e Error) bool { + // do we have an error handler? + if p._error == nil { + return true + } + + // pass the error through to the error handler + // - if this returns false, parsing will stop + return p._error(e) +} // errors() diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/pattern.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/pattern.go new file mode 100644 index 0000000..41dfe9d --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/pattern.go @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "path/filepath" + "strings" + + "github.com/danwakefield/fnmatch" +) + +// Pattern represents per-line patterns within a .gitignore file +type Pattern interface { + Match + + // Match returns true if the given path matches the name pattern. If the + // pattern is meant for directories only, and the path is not a directory, + // Match will return false. The matching is performed by fnmatch(). It + // is assumed path is relative to the base path of the owning GitIgnore. + Match(string, bool) bool +} + +// pattern is the base implementation of a .gitignore pattern +type pattern struct { + _negated bool + _anchored bool + _directory bool + _string string + _fnmatch string + _position Position +} // pattern() + +// name represents patterns matching a file or path name (i.e. the last +// component of a path) +type name struct { + pattern +} // name{} + +// path represents a pattern that contains at least one path separator within +// the pattern (i.e. not at the start or end of the pattern) +type path struct { + pattern + _depth int +} // path{} + +// any represents a pattern that contains at least one "any" token "**" +// allowing for recursive matching. +type any struct { + pattern + _tokens []*Token +} // any{} + +// NewPattern returns a Pattern from the ordered slice of Tokens. The tokens are +// assumed to represent a well-formed .gitignore pattern. A Pattern may be +// negated, anchored to the start of the path (relative to the base directory +// of tie containing .gitignore), or match directories only. +func NewPattern(tokens []*Token) Pattern { + // if we have no tokens there is no pattern + if len(tokens) == 0 { + return nil + } + + // extract the pattern position from first token + _position := tokens[0].Position + _string := tokenset(tokens).String() + + // is this a negated pattern? + _negated := false + if tokens[0].Type == NEGATION { + _negated = true + tokens = tokens[1:] + } + + // is this pattern anchored to the start of the path? + _anchored := false + if tokens[0].Type == SEPARATOR { + _anchored = true + tokens = tokens[1:] + } + + // is this pattern for directories only? + _directory := false + _last := len(tokens) - 1 + if len(tokens) != 0 { + if tokens[_last].Type == SEPARATOR { + _directory = true + tokens = tokens[:_last] + } + } + + // build the pattern expression + _fnmatch := tokenset(tokens).String() + _pattern := &pattern{ + _negated: _negated, + _anchored: _anchored, + _position: _position, + _directory: _directory, + _string: _string, + _fnmatch: _fnmatch, + } + return _pattern.compile(tokens) +} // NewPattern() + +// compile generates a specific Pattern (i.e. name, path or any) +// represented by the list of tokens. +func (p *pattern) compile(tokens []*Token) Pattern { + // what tokens do we have in this pattern? + // - ANY token means we can match to any depth + // - SEPARATOR means we have path rather than file matching + _separator := false + for _, _token := range tokens { + switch _token.Type { + case ANY: + return p.any(tokens) + case SEPARATOR: + _separator = true + } + } + + // should we perform path or name/file matching? + if _separator { + return p.path(tokens) + } else { + return p.name(tokens) + } +} // compile() + +// Ignore returns true if the pattern describes files or paths that should be +// ignored. +func (p *pattern) Ignore() bool { return !p._negated } + +// Include returns true if the pattern describes files or paths that should be +// included (i.e. not ignored) +func (p *pattern) Include() bool { return p._negated } + +// Position returns the position of the first token of this pattern. +func (p *pattern) Position() Position { return p._position } + +// String returns the string representation of the pattern. +func (p *pattern) String() string { return p._string } + +// +// name patterns +// - designed to match trailing file/directory names only +// + +// name returns a Pattern designed to match file or directory names, with no +// path elements. +func (p *pattern) name(tokens []*Token) Pattern { + return &name{*p} +} // name() + +// Match returns true if the given path matches the name pattern. If the +// pattern is meant for directories only, and the path is not a directory, +// Match will return false. The matching is performed by fnmatch(). It +// is assumed path is relative to the base path of the owning GitIgnore. +func (n *name) Match(path string, isdir bool) bool { + // are we expecting a directory? + if n._directory && !isdir { + return false + } + + // should we match the whole path, or just the last component? + if n._anchored { + return fnmatch.Match(n._fnmatch, path, 0) + } else { + _, _base := filepath.Split(path) + return fnmatch.Match(n._fnmatch, _base, 0) + } +} // Match() + +// +// path patterns +// - designed to match complete or partial paths (not just filenames) +// + +// path returns a Pattern designed to match paths that include at least one +// path separator '/' neither at the end nor the start of the pattern. +func (p *pattern) path(tokens []*Token) Pattern { + // how many directory components are we expecting? + _depth := 0 + for _, _token := range tokens { + if _token.Type == SEPARATOR { + _depth++ + } + } + + // return the pattern instance + return &path{pattern: *p, _depth: _depth} +} // path() + +// Match returns true if the given path matches the path pattern. If the +// pattern is meant for directories only, and the path is not a directory, +// Match will return false. The matching is performed by fnmatch() +// with flags set to FNM_PATHNAME. It is assumed path is relative to the +// base path of the owning GitIgnore. +func (p *path) Match(path string, isdir bool) bool { + // are we expecting a directory + if p._directory && !isdir { + return false + } + + if fnmatch.Match(p._fnmatch, path, fnmatch.FNM_PATHNAME) { + return true + } else if p._anchored { + return false + } + + // match against the trailing path elements + return fnmatch.Match(p._fnmatch, path, fnmatch.FNM_PATHNAME) +} // Match() + +// +// "any" patterns +// + +// any returns a Pattern designed to match paths that include at least one +// any pattern '**', specifying recursive matching. +func (p *pattern) any(tokens []*Token) Pattern { + // consider only the non-SEPARATOR tokens, as these will be matched + // against the path components + _tokens := make([]*Token, 0) + for _, _token := range tokens { + if _token.Type != SEPARATOR { + _tokens = append(_tokens, _token) + } + } + + return &any{*p, _tokens} +} // any() + +// Match returns true if the given path matches the any pattern. If the +// pattern is meant for directories only, and the path is not a directory, +// Match will return false. The matching is performed by recursively applying +// fnmatch() with flags set to FNM_PATHNAME. It is assumed path is relative to +// the base path of the owning GitIgnore. +func (a *any) Match(path string, isdir bool) bool { + // are we expecting a directory? + if a._directory && !isdir { + return false + } + + // split the path into components + _parts := strings.Split(path, string(_SEPARATOR)) + + // attempt to match the parts against the pattern tokens + return a.match(_parts, a._tokens) +} // Match() + +// match performs the recursive matching for 'any' patterns. An 'any' +// token '**' may match any path component, or no path component. +func (a *any) match(path []string, tokens []*Token) bool { + // if we have no more tokens, then we have matched this path + // if there are also no more path elements, otherwise there's no match + if len(tokens) == 0 { + return len(path) == 0 + } + + // what token are we trying to match? + _token := tokens[0] + switch _token.Type { + case ANY: + if len(path) == 0 { + return a.match(path, tokens[1:]) + } else { + return a.match(path, tokens[1:]) || a.match(path[1:], tokens) + } + + default: + // if we have a non-ANY token, then we must have a non-empty path + if len(path) != 0 { + // if the current path element matches this token, + // we match if the remainder of the path matches the + // remaining tokens + if fnmatch.Match(_token.Token(), path[0], fnmatch.FNM_PATHNAME) { + return a.match(path[1:], tokens[1:]) + } + } + } + + // if we are here, then we have no match + return false +} // match() + +// ensure the patterns confirm to the Pattern interface +var _ Pattern = &name{} +var _ Pattern = &path{} +var _ Pattern = &any{} diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/position.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/position.go new file mode 100644 index 0000000..236a0c3 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/position.go @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "fmt" +) + +// Position represents the position of the .gitignore parser, and the position +// of a .gitignore pattern within the parsed stream. +type Position struct { + File string + Line int + Column int + Offset int +} + +// String returns a string representation of the current position. +func (p Position) String() string { + _prefix := "" + if p.File != "" { + _prefix = p.File + ": " + } + + if p.Line == 0 { + return fmt.Sprintf("%s+%d", _prefix, p.Offset) + } else if p.Column == 0 { + return fmt.Sprintf("%s%d", _prefix, p.Line) + } else { + return fmt.Sprintf("%s%d:%d", _prefix, p.Line, p.Column) + } +} // String() + +// Zero returns true if the Position represents the zero Position +func (p Position) Zero() bool { + return p.Line+p.Column+p.Offset == 0 +} // Zero() diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/repository.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/repository.go new file mode 100644 index 0000000..1a69cdf --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/repository.go @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "os" + "path/filepath" + "strings" +) + +const File = ".gitignore" + +// repository is the implementation of the set of .gitignore files within a +// repository hierarchy +type repository struct { + ignore + _errors func(e Error) bool + _cache Cache + _file string + _exclude GitIgnore +} // repository{} + +// NewRepository returns a GitIgnore instance representing a git repository +// with root directory base. If base is not a directory, or base cannot be +// read, NewRepository will return an error. +// +// Internally, NewRepository uses NewRepositoryWithFile. +func NewRepository(base string) (GitIgnore, error) { + return NewRepositoryWithFile(base, File) +} // NewRepository() + +// NewRepositoryWithFile returns a GitIgnore instance representing a git +// repository with root directory base. The repository will use file as +// the name of the files within the repository from which to load the +// .gitignore patterns. If file is the empty string, NewRepositoryWithFile +// uses ".gitignore". If the ignore file name is ".gitignore", the returned +// GitIgnore instance will also consider patterns listed in +// $GIT_DIR/info/exclude when performing repository matching. +// +// Internally, NewRepositoryWithFile uses NewRepositoryWithErrors. +func NewRepositoryWithFile(base, file string) (GitIgnore, error) { + // define an error handler to catch any file access errors + // - record the first encountered error + var _error Error + _errors := func(e Error) bool { + if _error == nil { + _error = e + } + return true + } + + // attempt to retrieve the repository represented by this file + _repository := NewRepositoryWithErrors(base, file, _errors) + + // did we encounter an error? + // - if the error has a zero Position then it was encountered + // before parsing was attempted, so we return that error + if _error != nil { + if _error.Position().Zero() { + return nil, _error.Underlying() + } + } + + // otherwise, we ignore the parser errors + return _repository, nil +} // NewRepositoryWithFile() + +// NewRepositoryWithErrors returns a GitIgnore instance representing a git +// repository with a root directory base. As with NewRepositoryWithFile, file +// specifies the name of the files within the repository containing the +// .gitignore patterns, and defaults to ".gitignore" if file is not specified. +// If the ignore file name is ".gitignore", the returned GitIgnore instance +// will also consider patterns listed in $GIT_DIR/info/exclude when performing +// repository matching. +// +// If errors is given, it will be invoked for each error encountered while +// matching a path against the repository GitIgnore (such as file permission +// denied, or errors during .gitignore parsing). See Match below. +// +// Internally, NewRepositoryWithErrors uses NewRepositoryWithCache. +func NewRepositoryWithErrors(base, file string, errors func(e Error) bool) GitIgnore { + return NewRepositoryWithCache(base, file, NewCache(), errors) +} // NewRepositoryWithErrors() + +// NewRepositoryWithCache returns a GitIgnore instance representing a git +// repository with a root directory base. As with NewRepositoryWithErrors, +// file specifies the name of the files within the repository containing the +// .gitignore patterns, and defaults to ".gitignore" if file is not specified. +// If the ignore file name is ".gitignore", the returned GitIgnore instance +// will also consider patterns listed in $GIT_DIR/info/exclude when performing +// repository matching. +// +// NewRepositoryWithCache will attempt to load each .gitignore within the +// repository only once, using NewWithCache to store the corresponding +// GitIgnore instance in cache. If cache is given as nil, +// NewRepositoryWithCache will create a Cache instance for this repository. +// +// If errors is given, it will be invoked for each error encountered while +// matching a path against the repository GitIgnore (such as file permission +// denied, or errors during .gitignore parsing). See Match below. +func NewRepositoryWithCache(base, file string, cache Cache, errors func(e Error) bool) GitIgnore { + // do we have an error handler? + _errors := errors + if _errors == nil { + _errors = func(e Error) bool { return true } + } + + // extract the absolute path of the base directory + _base, _err := filepath.Abs(base) + if _err != nil { + _errors(NewError(_err, Position{})) + return nil + } + + // ensure the given base is a directory + _info, _err := os.Stat(_base) + if _info != nil { + if !_info.IsDir() { + _err = ErrInvalidDirectoryError + } + } + if _err != nil { + _errors(NewError(_err, Position{})) + return nil + } + + // if we haven't been given a base file name, use the default + if file == "" { + file = File + } + + // are we matching .gitignore files? + // - if we are, we also consider $GIT_DIR/info/exclude + var _exclude GitIgnore + if file == File { + _exclude, _err = exclude(_base) + if _err != nil { + _errors(NewError(_err, Position{})) + return nil + } + } + + // create the repository instance + _ignore := ignore{_base: _base} + _repository := &repository{ + ignore: _ignore, + _errors: _errors, + _exclude: _exclude, + _cache: cache, + _file: file, + } + + return _repository +} // NewRepositoryWithCache() + +// Match attempts to match the path against this repository. Matching proceeds +// according to normal gitignore rules, where .gtignore files in the same +// directory as path, take precedence over .gitignore files higher up the +// path hierarchy, and child files and directories are ignored if the parent +// is ignored. If the path is matched by a gitignore pattern in the repository, +// a Match is returned detailing the matched pattern. The returned Match +// can be used to determine if the path should be ignored or included according +// to the repository. +// +// If an error is encountered during matching, the repository error handler +// (if configured via NewRepositoryWithErrors or NewRepositoryWithCache), will +// be called. If the error handler returns false, matching will terminate and +// Match will return nil. If handler returns true, Match will continue +// processing in an attempt to match path. +// +// Match will raise an error and return nil if the absolute path cannot be +// determined, or if its not possible to determine if path represents a file +// or a directory. +// +// If path is not located under the root of this repository, Match returns nil. +func (r *repository) Match(path string) Match { + // ensure we have the absolute path for the given file + _path, _err := filepath.Abs(path) + if _err != nil { + r._errors(NewError(_err, Position{})) + return nil + } + + // is the path a file or a directory? + _info, _err := os.Stat(_path) + if _err != nil { + r._errors(NewError(_err, Position{})) + return nil + } + _isdir := _info.IsDir() + + // attempt to match the absolute path + return r.Absolute(_path, _isdir) +} // Match() + +// Absolute attempts to match an absolute path against this repository. If the +// path is not located under the base directory of this repository, or is not +// matched by this repository, nil is returned. +func (r *repository) Absolute(path string, isdir bool) Match { + // does the file share the same directory as this ignore file? + if !strings.HasPrefix(path, r.Base()) { + return nil + } + + // extract the relative path of this file + _prefix := len(r.Base()) + 1 + _rel := string(path[_prefix:]) + return r.Relative(_rel, isdir) +} // Absolute() + +// Relative attempts to match a path relative to the repository base directory. +// If the path is not matched by the repository, nil is returned. +func (r *repository) Relative(path string, isdir bool) Match { + // if there's no path, then there's nothing to match + _path := filepath.Clean(path) + if _path == "." { + return nil + } + + // repository matching: + // - a child path cannot be considered if its parent is ignored + // - a .gitignore in a lower directory overrides a .gitignore in a + // higher directory + + // first, is the parent directory ignored? + // - extract the parent directory from the current path + _parent, _local := filepath.Split(_path) + _match := r.Relative(_parent, true) + if _match != nil { + if _match.Ignore() { + return _match + } + } + _parent = filepath.Clean(_parent) + + // the parent directory isn't ignored, so we now look at the original path + // - we consider .gitignore files in the current directory first, then + // move up the path hierarchy + var _last string + for { + _file := filepath.Join(r._base, _parent, r._file) + _ignore := NewWithCache(_file, r._cache, r._errors) + if _ignore != nil { + _match := _ignore.Relative(_local, isdir) + if _match != nil { + return _match + } + } + + // if there's no parent, then we're done + // - since we use filepath.Clean() we look for "." + if _parent == "." { + break + } + + // we don't have a match for this file, so we progress up the + // path hierarchy + // - we are manually building _local using the .gitignore + // separator "/", which is how we handle operating system + // file system differences + _parent, _last = filepath.Split(_parent) + _parent = filepath.Clean(_parent) + _local = _last + string(_SEPARATOR) + _local + } + + // do we have a global exclude file? (i.e. GIT_DIR/info/exclude) + if r._exclude != nil { + return r._exclude.Relative(path, isdir) + } + + // we have no match + return nil +} // Relative() + +// ensure repository satisfies the GitIgnore interface +var _ GitIgnore = &repository{} diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/rune.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/rune.go new file mode 100644 index 0000000..72ba469 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/rune.go @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +const ( + // define the sentinel runes of the lexer + _EOF = rune(0) + _CR = rune('\r') + _NEWLINE = rune('\n') + _COMMENT = rune('#') + _SEPARATOR = rune('/') + _ESCAPE = rune('\\') + _SPACE = rune(' ') + _TAB = rune('\t') + _NEGATION = rune('!') + _WILDCARD = rune('*') +) diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/token.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/token.go new file mode 100644 index 0000000..8402a70 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/token.go @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +import ( + "fmt" +) + +// Token represents a parsed token from a .gitignore stream, encapsulating the +// token type, the runes comprising the token, and the position within the +// stream of the first rune of the token. +type Token struct { + Type TokenType + Word []rune + Position +} + +// NewToken returns a Token instance of the given t, represented by the +// word runes, at the stream position pos. If the token type is not know, the +// returned instance will have type BAD. +func NewToken(t TokenType, word []rune, pos Position) *Token { + // ensure the type is valid + if t < ILLEGAL || t > BAD { + t = BAD + } + + // return the token + return &Token{Type: t, Word: word, Position: pos} +} // NewToken() + +// Name returns a string representation of the Token type. +func (t *Token) Name() string { + return t.Type.String() +} // Name() + +// Token returns the string representation of the Token word. +func (t *Token) Token() string { + return string(t.Word) +} // Token() + +// String returns a string representation of the Token, encapsulating its +// position in the input stream, its name (i.e. type), and its runes. +func (t *Token) String() string { + return fmt.Sprintf("%s: %s %q", t.Position.String(), t.Name(), t.Token()) +} // String() diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/tokenset.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/tokenset.go new file mode 100644 index 0000000..228e702 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/tokenset.go @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +// tokenset represents an ordered list of Tokens +type tokenset []*Token + +// String() returns a concatenated string of all runes represented by the +// list of tokens. +func (t tokenset) String() string { + // concatenate the tokens into a single string + _rtn := "" + for _, _t := range []*Token(t) { + _rtn = _rtn + _t.Token() + } + return _rtn +} // String() diff --git a/vendor/github.com/boyter/gocodewalker/go-gitignore/tokentype.go b/vendor/github.com/boyter/gocodewalker/go-gitignore/tokentype.go new file mode 100644 index 0000000..d45eabe --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/go-gitignore/tokentype.go @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: MIT + +package gitignore + +type TokenType int + +const ( + ILLEGAL TokenType = iota + EOF + EOL + WHITESPACE + COMMENT + SEPARATOR + NEGATION + PATTERN + ANY + BAD +) + +// String returns a string representation of the Token type. +func (t TokenType) String() string { + switch t { + case ILLEGAL: + return "ILLEGAL" + case EOF: + return "EOF" + case EOL: + return "EOL" + case WHITESPACE: + return "WHITESPACE" + case COMMENT: + return "COMMENT" + case SEPARATOR: + return "SEPARATOR" + case NEGATION: + return "NEGATION" + case PATTERN: + return "PATTERN" + case ANY: + return "ANY" + default: + return "BAD TOKEN" + } +} // String() diff --git a/vendor/github.com/boyter/gocodewalker/hidden.go b/vendor/github.com/boyter/gocodewalker/hidden.go new file mode 100644 index 0000000..c36c9e1 --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/hidden.go @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: MIT +//go:build !windows + +package gocodewalker + +import ( + "io/fs" + "os" +) + +// IsHidden Returns true if file is hidden +func IsHidden(file os.FileInfo, directory string) (bool, error) { + return IsHiddenDirEntry(fs.FileInfoToDirEntry(file), directory) +} + +// IsHiddenDirEntry is similar to [IsHidden], excepts it accepts [fs.DirEntry] as its argument +func IsHiddenDirEntry(file fs.DirEntry, directory string) (bool, error) { + return file.Name()[0:1] == ".", nil +} diff --git a/vendor/github.com/boyter/gocodewalker/hidden_windows.go b/vendor/github.com/boyter/gocodewalker/hidden_windows.go new file mode 100644 index 0000000..3157afb --- /dev/null +++ b/vendor/github.com/boyter/gocodewalker/hidden_windows.go @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: MIT +//go:build windows + +package gocodewalker + +import ( + "io/fs" + "os" + "path" + "syscall" +) + +// IsHidden Returns true if file is hidden +func IsHidden(file os.FileInfo, directory string) (bool, error) { + return IsHiddenDirEntry(fs.FileInfoToDirEntry(file), directory) +} + +// IsHiddenDirEntry is similar to [IsHidden], excepts it accepts [fs.DirEntry] as its argument +func IsHiddenDirEntry(file fs.DirEntry, directory string) (bool, error) { + fullpath := path.Join(directory, file.Name()) + pointer, err := syscall.UTF16PtrFromString(fullpath) + if err != nil { + return false, err + } + attributes, err := syscall.GetFileAttributes(pointer) + if err != nil { + return false, err + } + return attributes&syscall.FILE_ATTRIBUTE_HIDDEN != 0, nil +} diff --git a/vendor/github.com/danwakefield/fnmatch/.gitignore b/vendor/github.com/danwakefield/fnmatch/.gitignore new file mode 100644 index 0000000..daf913b --- /dev/null +++ b/vendor/github.com/danwakefield/fnmatch/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof diff --git a/vendor/github.com/danwakefield/fnmatch/LICENSE b/vendor/github.com/danwakefield/fnmatch/LICENSE new file mode 100644 index 0000000..0dc9851 --- /dev/null +++ b/vendor/github.com/danwakefield/fnmatch/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) 2016, Daniel Wakefield +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/danwakefield/fnmatch/README.md b/vendor/github.com/danwakefield/fnmatch/README.md new file mode 100644 index 0000000..b8d7156 --- /dev/null +++ b/vendor/github.com/danwakefield/fnmatch/README.md @@ -0,0 +1,4 @@ +# fnmatch +Updated clone of kballards golang fnmatch gist (https://gist.github.com/kballard/272720) + + diff --git a/vendor/github.com/danwakefield/fnmatch/fnmatch.go b/vendor/github.com/danwakefield/fnmatch/fnmatch.go new file mode 100644 index 0000000..07ac7b3 --- /dev/null +++ b/vendor/github.com/danwakefield/fnmatch/fnmatch.go @@ -0,0 +1,219 @@ +// Provide string-matching based on fnmatch.3 +package fnmatch + +// There are a few issues that I believe to be bugs, but this implementation is +// based as closely as possible on BSD fnmatch. These bugs are present in the +// source of BSD fnmatch, and so are replicated here. The issues are as follows: +// +// * FNM_PERIOD is no longer observed after the first * in a pattern +// This only applies to matches done with FNM_PATHNAME as well +// * FNM_PERIOD doesn't apply to ranges. According to the documentation, +// a period must be matched explicitly, but a range will match it too + +import ( + "unicode" + "unicode/utf8" +) + +const ( + FNM_NOESCAPE = (1 << iota) + FNM_PATHNAME + FNM_PERIOD + + FNM_LEADING_DIR + FNM_CASEFOLD + + FNM_IGNORECASE = FNM_CASEFOLD + FNM_FILE_NAME = FNM_PATHNAME +) + +func unpackRune(str *string) rune { + rune, size := utf8.DecodeRuneInString(*str) + *str = (*str)[size:] + return rune +} + +// Matches the pattern against the string, with the given flags, +// and returns true if the match is successful. +// This function should match fnmatch.3 as closely as possible. +func Match(pattern, s string, flags int) bool { + // The implementation for this function was patterned after the BSD fnmatch.c + // source found at http://src.gnu-darwin.org/src/contrib/csup/fnmatch.c.html + noescape := (flags&FNM_NOESCAPE != 0) + pathname := (flags&FNM_PATHNAME != 0) + period := (flags&FNM_PERIOD != 0) + leadingdir := (flags&FNM_LEADING_DIR != 0) + casefold := (flags&FNM_CASEFOLD != 0) + // the following is some bookkeeping that the original fnmatch.c implementation did not do + // We are forced to do this because we're not keeping indexes into C strings but rather + // processing utf8-encoded strings. Use a custom unpacker to maintain our state for us + sAtStart := true + sLastAtStart := true + sLastSlash := false + sLastUnpacked := rune(0) + unpackS := func() rune { + sLastSlash = (sLastUnpacked == '/') + sLastUnpacked = unpackRune(&s) + sLastAtStart = sAtStart + sAtStart = false + return sLastUnpacked + } + for len(pattern) > 0 { + c := unpackRune(&pattern) + switch c { + case '?': + if len(s) == 0 { + return false + } + sc := unpackS() + if pathname && sc == '/' { + return false + } + if period && sc == '.' && (sLastAtStart || (pathname && sLastSlash)) { + return false + } + case '*': + // collapse multiple *'s + // don't use unpackRune here, the only char we care to detect is ASCII + for len(pattern) > 0 && pattern[0] == '*' { + pattern = pattern[1:] + } + if period && s[0] == '.' && (sAtStart || (pathname && sLastUnpacked == '/')) { + return false + } + // optimize for patterns with * at end or before / + if len(pattern) == 0 { + if pathname { + return leadingdir || (strchr(s, '/') == -1) + } else { + return true + } + return !(pathname && strchr(s, '/') >= 0) + } else if pathname && pattern[0] == '/' { + offset := strchr(s, '/') + if offset == -1 { + return false + } else { + // we already know our pattern and string have a /, skip past it + s = s[offset:] // use unpackS here to maintain our bookkeeping state + unpackS() + pattern = pattern[1:] // we know / is one byte long + break + } + } + // general case, recurse + for test := s; len(test) > 0; unpackRune(&test) { + // I believe the (flags &^ FNM_PERIOD) is a bug when FNM_PATHNAME is specified + // but this follows exactly from how fnmatch.c implements it + if Match(pattern, test, (flags &^ FNM_PERIOD)) { + return true + } else if pathname && test[0] == '/' { + break + } + } + return false + case '[': + if len(s) == 0 { + return false + } + if pathname && s[0] == '/' { + return false + } + sc := unpackS() + if !rangematch(&pattern, sc, flags) { + return false + } + case '\\': + if !noescape { + if len(pattern) > 0 { + c = unpackRune(&pattern) + } + } + fallthrough + default: + if len(s) == 0 { + return false + } + sc := unpackS() + switch { + case sc == c: + case casefold && unicode.ToLower(sc) == unicode.ToLower(c): + default: + return false + } + } + } + return len(s) == 0 || (leadingdir && s[0] == '/') +} + +func rangematch(pattern *string, test rune, flags int) bool { + if len(*pattern) == 0 { + return false + } + casefold := (flags&FNM_CASEFOLD != 0) + noescape := (flags&FNM_NOESCAPE != 0) + if casefold { + test = unicode.ToLower(test) + } + var negate, matched bool + if (*pattern)[0] == '^' || (*pattern)[0] == '!' { + negate = true + (*pattern) = (*pattern)[1:] + } + for !matched && len(*pattern) > 1 && (*pattern)[0] != ']' { + c := unpackRune(pattern) + if !noescape && c == '\\' { + if len(*pattern) > 1 { + c = unpackRune(pattern) + } else { + return false + } + } + if casefold { + c = unicode.ToLower(c) + } + if (*pattern)[0] == '-' && len(*pattern) > 1 && (*pattern)[1] != ']' { + unpackRune(pattern) // skip the - + c2 := unpackRune(pattern) + if !noescape && c2 == '\\' { + if len(*pattern) > 0 { + c2 = unpackRune(pattern) + } else { + return false + } + } + if casefold { + c2 = unicode.ToLower(c2) + } + // this really should be more intelligent, but it looks like + // fnmatch.c does simple int comparisons, therefore we will as well + if c <= test && test <= c2 { + matched = true + } + } else if c == test { + matched = true + } + } + // skip past the rest of the pattern + ok := false + for !ok && len(*pattern) > 0 { + c := unpackRune(pattern) + if c == '\\' && len(*pattern) > 0 { + unpackRune(pattern) + } else if c == ']' { + ok = true + } + } + return ok && matched != negate +} + +// define strchr because strings.Index() seems a bit overkill +// returns the index of c in s, or -1 if there is no match +func strchr(s string, c rune) int { + for i, sc := range s { + if sc == c { + return i + } + } + return -1 +} diff --git a/vendor/golang.org/x/sync/LICENSE b/vendor/golang.org/x/sync/LICENSE new file mode 100644 index 0000000..2a7cf70 --- /dev/null +++ b/vendor/golang.org/x/sync/LICENSE @@ -0,0 +1,27 @@ +Copyright 2009 The Go Authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/sync/PATENTS b/vendor/golang.org/x/sync/PATENTS new file mode 100644 index 0000000..7330990 --- /dev/null +++ b/vendor/golang.org/x/sync/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/sync/errgroup/errgroup.go b/vendor/golang.org/x/sync/errgroup/errgroup.go new file mode 100644 index 0000000..1d8cffa --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/errgroup.go @@ -0,0 +1,151 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package errgroup provides synchronization, error propagation, and Context +// cancelation for groups of goroutines working on subtasks of a common task. +// +// [errgroup.Group] is related to [sync.WaitGroup] but adds handling of tasks +// returning errors. +package errgroup + +import ( + "context" + "fmt" + "sync" +) + +type token struct{} + +// A Group is a collection of goroutines working on subtasks that are part of +// the same overall task. A Group should not be reused for different tasks. +// +// A zero Group is valid, has no limit on the number of active goroutines, +// and does not cancel on error. +type Group struct { + cancel func(error) + + wg sync.WaitGroup + + sem chan token + + errOnce sync.Once + err error +} + +func (g *Group) done() { + if g.sem != nil { + <-g.sem + } + g.wg.Done() +} + +// WithContext returns a new Group and an associated Context derived from ctx. +// +// The derived Context is canceled the first time a function passed to Go +// returns a non-nil error or the first time Wait returns, whichever occurs +// first. +func WithContext(ctx context.Context) (*Group, context.Context) { + ctx, cancel := context.WithCancelCause(ctx) + return &Group{cancel: cancel}, ctx +} + +// Wait blocks until all function calls from the Go method have returned, then +// returns the first non-nil error (if any) from them. +func (g *Group) Wait() error { + g.wg.Wait() + if g.cancel != nil { + g.cancel(g.err) + } + return g.err +} + +// Go calls the given function in a new goroutine. +// +// The first call to Go must happen before a Wait. +// It blocks until the new goroutine can be added without the number of +// goroutines in the group exceeding the configured limit. +// +// The first goroutine in the group that returns a non-nil error will +// cancel the associated Context, if any. The error will be returned +// by Wait. +func (g *Group) Go(f func() error) { + if g.sem != nil { + g.sem <- token{} + } + + g.wg.Add(1) + go func() { + defer g.done() + + // It is tempting to propagate panics from f() + // up to the goroutine that calls Wait, but + // it creates more problems than it solves: + // - it delays panics arbitrarily, + // making bugs harder to detect; + // - it turns f's panic stack into a mere value, + // hiding it from crash-monitoring tools; + // - it risks deadlocks that hide the panic entirely, + // if f's panic leaves the program in a state + // that prevents the Wait call from being reached. + // See #53757, #74275, #74304, #74306. + + if err := f(); err != nil { + g.errOnce.Do(func() { + g.err = err + if g.cancel != nil { + g.cancel(g.err) + } + }) + } + }() +} + +// TryGo calls the given function in a new goroutine only if the number of +// active goroutines in the group is currently below the configured limit. +// +// The return value reports whether the goroutine was started. +func (g *Group) TryGo(f func() error) bool { + if g.sem != nil { + select { + case g.sem <- token{}: + // Note: this allows barging iff channels in general allow barging. + default: + return false + } + } + + g.wg.Add(1) + go func() { + defer g.done() + + if err := f(); err != nil { + g.errOnce.Do(func() { + g.err = err + if g.cancel != nil { + g.cancel(g.err) + } + }) + } + }() + return true +} + +// SetLimit limits the number of active goroutines in this group to at most n. +// A negative value indicates no limit. +// A limit of zero will prevent any new goroutines from being added. +// +// Any subsequent call to the Go method will block until it can add an active +// goroutine without exceeding the configured limit. +// +// The limit must not be modified while any goroutines in the group are active. +func (g *Group) SetLimit(n int) { + if n < 0 { + g.sem = nil + return + } + if len(g.sem) != 0 { + panic(fmt.Errorf("errgroup: modify limit while %v goroutines in the group are still active", len(g.sem))) + } + g.sem = make(chan token, n) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index b3de659..4e8de92 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,6 +1,13 @@ +# github.com/boyter/gocodewalker v1.5.1 +## explicit; go 1.23.0 +github.com/boyter/gocodewalker +github.com/boyter/gocodewalker/go-gitignore # github.com/cespare/xxhash/v2 v2.3.0 ## explicit; go 1.11 github.com/cespare/xxhash/v2 +# github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 +## explicit +github.com/danwakefield/fnmatch # github.com/djherbis/times v1.6.0 ## explicit; go 1.16 github.com/djherbis/times @@ -63,6 +70,9 @@ golang.org/x/crypto/sha3 # golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b ## explicit; go 1.23.0 golang.org/x/exp/constraints +# golang.org/x/sync v0.16.0 +## explicit; go 1.23.0 +golang.org/x/sync/errgroup # golang.org/x/sys v0.36.0 ## explicit; go 1.24.0 golang.org/x/sys/cpu From 9b5ffef172312ee682fe92de924027cd785dfae1 Mon Sep 17 00:00:00 2001 From: Ben Boyter Date: Fri, 12 Dec 2025 11:26:28 +1100 Subject: [PATCH 2/4] add in new file walker --- main.go | 2 -- processor/file.go | 5 +---- processor/processor.go | 5 ++--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/main.go b/main.go index 7141fe0..7484036 100644 --- a/main.go +++ b/main.go @@ -3,7 +3,6 @@ package main import ( - "fmt" "os" "path/filepath" "runtime" @@ -34,7 +33,6 @@ func main() { } } - fmt.Println(processor.GitIgnore, processor.GitModuleIgnore) processor.DirFilePaths = filePaths processor.Process() }, diff --git a/processor/file.go b/processor/file.go index 71198b3..091feea 100644 --- a/processor/file.go +++ b/processor/file.go @@ -36,9 +36,6 @@ func walkDirectoryWithIgnore(toWalk string, output chan string) { fileWalker := gocodewalker.NewFileWalker(toWalk, fileListQueue) // we only want to have a custom ignore file - fileWalker.IgnoreGitIgnore = true - fileWalker.IgnoreIgnoreFile = true - fileWalker.IgnoreGitIgnore = GitIgnore fileWalker.IgnoreIgnoreFile = Ignore fileWalker.IgnoreGitModules = GitModuleIgnore @@ -74,6 +71,6 @@ func walkDirectoryWithIgnore(toWalk string, output chan string) { }() for f := range fileListQueue { - fmt.Println(f.Location) + output <- f.Location } } diff --git a/processor/processor.go b/processor/processor.go index cc3ca1f..56967c8 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -81,7 +81,7 @@ var GitModuleIgnore = false var Ignore = false // HashIgnore set true to enable hashignore file checks -var HashIgnore = true +var HashIgnore = false // PathDenyList sets the paths that should be skipped var PathDenyList = []string{} @@ -174,13 +174,12 @@ func Process() { } else { if fi.IsDir() { if Recursive { - walkDirectory(fp, fileListQueue) + walkDirectoryWithIgnore(fp, fileListQueue) } } else { fileListQueue <- fp } } - } close(fileListQueue) }() From e45804d0a5759880fa9097d854559f608996c592 Mon Sep 17 00:00:00 2001 From: Ben Boyter Date: Mon, 15 Dec 2025 09:47:50 +1100 Subject: [PATCH 3/4] fix ignore parsing issue --- processor/file.go | 10 ++++++---- processor/processor.go | 12 ++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/processor/file.go b/processor/file.go index 091feea..d8123ca 100644 --- a/processor/file.go +++ b/processor/file.go @@ -33,12 +33,14 @@ func walkDirectory(toWalk string, output chan string) { func walkDirectoryWithIgnore(toWalk string, output chan string) { fileListQueue := make(chan *gocodewalker.File, 1000) + //fileWalker := gocodewalker.NewParallelFileWalker([]string{toWalk}, fileListQueue) fileWalker := gocodewalker.NewFileWalker(toWalk, fileListQueue) - // we only want to have a custom ignore file - fileWalker.IgnoreGitIgnore = GitIgnore - fileWalker.IgnoreIgnoreFile = Ignore - fileWalker.IgnoreGitModules = GitModuleIgnore + // The user flags are to enable processing, while gocodewalker is to disable + // so we need to invert the values. + fileWalker.IgnoreGitIgnore = !GitIgnore + fileWalker.IgnoreIgnoreFile = !Ignore + fileWalker.IgnoreGitModules = !GitModuleIgnore fileWalker.IncludeHidden = true fileWalker.ExcludeDirectory = PathDenyList diff --git a/processor/processor.go b/processor/processor.go index 56967c8..b14fcfc 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -71,14 +71,14 @@ var StreamSize int64 = 1_000_000 // FileInput indicates we have a file passed in which consists of a var FileInput = "" -// GitIgnore set true to enable .gitignore checks -var GitIgnore = false +// GitIgnore set false to enable .gitignore checks +var GitIgnore = true -// GitModuleIgnore set true to enable .gitmodules checks -var GitModuleIgnore = false +// GitModuleIgnore set false to enable .gitmodules checks +var GitModuleIgnore = true -// Ignore set true to enable ignore file checks -var Ignore = false +// Ignore set false to enable ignore file checks +var Ignore = true // HashIgnore set true to enable hashignore file checks var HashIgnore = false From 220796aaa983018bb306df0e285538d825d8d8de Mon Sep 17 00:00:00 2001 From: Ben Boyter Date: Mon, 15 Dec 2025 09:49:35 +1100 Subject: [PATCH 4/4] allow disable of parallel file walking --- processor/file.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/processor/file.go b/processor/file.go index d8123ca..a0ed190 100644 --- a/processor/file.go +++ b/processor/file.go @@ -33,8 +33,12 @@ func walkDirectory(toWalk string, output chan string) { func walkDirectoryWithIgnore(toWalk string, output chan string) { fileListQueue := make(chan *gocodewalker.File, 1000) - //fileWalker := gocodewalker.NewParallelFileWalker([]string{toWalk}, fileListQueue) - fileWalker := gocodewalker.NewFileWalker(toWalk, fileListQueue) + var fileWalker *gocodewalker.FileWalker + if NoThreads != 1 { + fileWalker = gocodewalker.NewParallelFileWalker([]string{toWalk}, fileListQueue) + } else { + fileWalker = gocodewalker.NewFileWalker(toWalk, fileListQueue) + } // The user flags are to enable processing, while gocodewalker is to disable // so we need to invert the values.