From 2baf766f418d53df94ceab4b2b9bf1b8f3720842 Mon Sep 17 00:00:00 2001 From: Max Altgelt Date: Tue, 27 May 2025 12:46:14 +0200 Subject: [PATCH] feat: initial commit --- .github/workflows/go.yml | 35 ++++++++++++ README.md | 32 +++++++++++ go.mod | 17 ++++++ go.sum | 18 +++++++ store.go | 114 +++++++++++++++++++++++++++++++++++++++ store_test.go | 108 +++++++++++++++++++++++++++++++++++++ 6 files changed, 324 insertions(+) create mode 100644 .github/workflows/go.yml create mode 100644 README.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 store.go create mode 100644 store_test.go diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml new file mode 100644 index 0000000..54a8a9b --- /dev/null +++ b/.github/workflows/go.yml @@ -0,0 +1,35 @@ +name: go +on: + push: + branches: + - master + pull_request: + +env: + GOPRIVATE: github.com/NextronSystems/ + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: stable + - name: Use token + run: git config --global url."https://${{ secrets.JSONLOG_ACCESS_TOKEN }}@github.com/".insteadOf "https://github.com/" + - name: Test + run: go test -v ./... + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: stable + - name: Use token + run: git config --global url."https://${{ secrets.JSONLOG_ACCESS_TOKEN }}@github.com/".insteadOf "https://github.com/" + - name: Lint + uses: golangci/golangci-lint-action@v8 + with: + version: v2.1.6 diff --git a/README.md b/README.md new file mode 100644 index 0000000..6098b64 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +# THOR Finding Store + +This package provides a simple on-disk store for findings generated by THOR scans. + +Both findings and the content that was matched on (typically the file content) are stored, with both referencing each other. +A finding will have none or one content item, while a content item may be referenced by multiple findings. + +## Layout + +The store is a directory with the following structure: + +``` +├── findings +│ └── ab +│ ├── abcdef1234567890 +│ └── abcdef1234567890.hash +└── samples + └── 1e + ├── 1edc8bf0596dcdc0ca93b6dd89e14b57d0b4faf5da534d5487f9ed7ad0eb7e06 + └── 1edc8bf0596dcdc0ca93b6dd89e14b57d0b4faf5da534d5487f9ed7ad0eb7e06.metadata +``` + +Where: + +- `findings/` contains the findings, organized by their first two characters of the finding ID. + + Each finding is stored in a file named after the finding ID, with an additional `.hash` file containing the SHA256 hash of the finding. +- `samples/` contains the content matched on, organized by the first two characters of the content's SHA256 hash. + + Each content item is stored in a file named after its SHA256 hash, with an additional `.metadata` file containing the findings that reference this content item in newline delimited JSON format. + +Optionally, by setting `Layout.Flat` to `true`, the store can be configured to not create subdirectories under `findings/` and `samples/`, but instead store all findings and samples in a single directory each. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..bf63662 --- /dev/null +++ b/go.mod @@ -0,0 +1,17 @@ +module github.com/NextronSystems/finding-store + +go 1.24 + +require ( + github.com/NextronSystems/jsonlog v0.0.0-20250523073520-69e056dcf33d + github.com/stretchr/testify v1.10.0 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/exp v0.0.0-20240213143201-ec583247a57a // indirect + golang.org/x/mod v0.15.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..9bd3985 --- /dev/null +++ b/go.sum @@ -0,0 +1,18 @@ +github.com/NextronSystems/jsonlog v0.0.0-20250523073520-69e056dcf33d h1:Lo5904HF61rzum1Md+xaQ0LsLMXb6EpzxaSPL/JRr50= +github.com/NextronSystems/jsonlog v0.0.0-20250523073520-69e056dcf33d/go.mod h1:Hk47VW018TX8o/0sxK+EJt16iRE7gB91zGZGiaAjcww= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/exp v0.0.0-20240213143201-ec583247a57a h1:HinSgX1tJRX3KsL//Gxynpw5CTOAIPhgL4W8PNiIpVE= +golang.org/x/exp v0.0.0-20240213143201-ec583247a57a/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= +golang.org/x/mod v0.15.0 h1:SernR4v+D55NyBH2QiEQrlBAnj1ECL6AGrA5+dPaMY8= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/store.go b/store.go new file mode 100644 index 0000000..1ab28ba --- /dev/null +++ b/store.go @@ -0,0 +1,114 @@ +package store + +import ( + "bytes" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/NextronSystems/jsonlog/thorlog/v3" +) + +func New(root string) *Store { + return &Store{ + RootDir: root, + Flat: false, + } +} + +type Store struct { + RootDir string + Flat bool +} + +const ( + subdirFindings = "findings" + subdirContent = "samples" + suffixMetadata = ".metadata" + suffixHash = ".hash" +) + +func (s *Store) Store(finding *thorlog.Finding, content io.ReadSeeker) error { + findingId := finding.Meta.GenID + if findingId == "" { + return fmt.Errorf("finding ID is empty, cannot store finding") + } else if len(findingId) < 2 { + return fmt.Errorf("finding ID is too short, must be at least 2 characters: %s", findingId) + } + var contentHash string + if content != nil { + // Shortcut: if the content is already hashed, we can use it directly. + if file, isFile := finding.Subject.(*thorlog.File); isFile && file.Hashes != nil { + contentHash = file.Hashes.Sha256 + } else { + hash := sha256.New() + if _, err := io.Copy(hash, content); err != nil { + return fmt.Errorf("could not hash content: %w", err) + } + contentHash = hex.EncodeToString(hash.Sum(nil)) + // Reset the content reader to the beginning for later use. + if _, err := content.Seek(0, io.SeekStart); err != nil { + return fmt.Errorf("cannot reset content reader: %w", err) + } + } + } + findingJson, err := json.Marshal(finding) + if err != nil { + return fmt.Errorf("cannot marshal finding: %w", err) + } + if err := s.storeData(subdirFindings, findingId, bytes.NewReader(findingJson), false); err != nil { + return fmt.Errorf("cannot store finding data: %w", err) + } + if content != nil { + if err := s.storeData(subdirContent, contentHash, content, false); err != nil { + if !os.IsExist(err) { // If the content already exists, we can ignore the error. + return fmt.Errorf("cannot store content data: %w", err) + } + } + // Store cross-references: Finding ID -> content hash, and content hash -> finding metadata. + // A finding can have only one content hash, but a content hash can be referenced by multiple findings. + if err := s.storeData(subdirFindings, findingId+suffixHash, strings.NewReader(contentHash), false); err != nil { + return fmt.Errorf("cannot store content hash for finding: %w", err) + } + if err := s.storeData(subdirContent, contentHash+suffixMetadata, bytes.NewReader(append(findingJson, '\n')), true); err != nil { + return fmt.Errorf("cannot store content metadata: %w", err) + } + } + return nil +} + +func (s *Store) storeData(subdir string, id string, data io.Reader, append bool) error { + path := s.path(subdir, id) + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return fmt.Errorf("cannot create directory: %w", err) + } + var openFlags = os.O_WRONLY | os.O_CREATE + if append { + openFlags |= os.O_APPEND + } else { + openFlags |= os.O_EXCL + } + file, err := os.OpenFile(path, openFlags, 0644) + if err != nil { + return fmt.Errorf("cannot create file: %w", err) + } + defer func() { + _ = file.Close() + }() + if _, err := io.Copy(file, data); err != nil { + return fmt.Errorf("cannot write to file: %w", err) + } + return nil +} + +func (s *Store) path(subdir string, id string) string { + if s.Flat { + return filepath.Join(s.RootDir, subdir, id) + } + return filepath.Join(s.RootDir, subdir, id[:2], id) +} diff --git a/store_test.go b/store_test.go new file mode 100644 index 0000000..928370a --- /dev/null +++ b/store_test.go @@ -0,0 +1,108 @@ +package store + +import ( + "bufio" + "bytes" + "crypto/sha256" + "encoding/hex" + "fmt" + "os" + "testing" + "time" + + "github.com/NextronSystems/jsonlog/thorlog/parser" + "github.com/NextronSystems/jsonlog/thorlog/v3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLayout(t *testing.T) { + rootDir := t.TempDir() + layout := New(rootDir) + + finding := thorlog.NewFinding(thorlog.NewFile("test.txt"), "Test finding") + finding.Meta = thorlog.LogEventMetadata{ + GenID: "abcdef1234567890", + Time: time.Now(), + Lvl: thorlog.Alert, + } + var content = []byte("This is a test content for the finding.") + sha256Hash := sha256.Sum256(content) + if err := layout.Store(finding, bytes.NewReader(content)); err != nil { + t.Fatalf("Failed to store finding: %v", err) + } + hashString := hex.EncodeToString(sha256Hash[:]) + + readFinding, contentHash, err := layout.LoadFinding("abcdef1234567890") + require.NoError(t, err) + assert.Equal(t, contentHash, hashString) + assert.NotNil(t, readFinding) + assert.Equal(t, finding.Meta.GenID, readFinding.Meta.GenID) + assert.Equal(t, finding.Subject.(*thorlog.File).Path, "test.txt") + + loadedContent, findings, err := layout.LoadContent(hashString) + require.NoError(t, err) + assert.Equal(t, content, loadedContent) + assert.Len(t, findings, 1) + assert.Equal(t, findings[0].Meta.GenID, readFinding.Meta.GenID) +} + +func (s *Store) LoadFinding(id string) (*thorlog.Finding, string, error) { + if len(id) < 2 { + return nil, "", fmt.Errorf("finding ID is too short, must be at least 2 characters: %s", id) + } + path := s.path(subdirFindings, id) + data, err := os.ReadFile(path) + if err != nil { + return nil, "", fmt.Errorf("cannot read finding data: %w", err) + } + event, err := parser.ParseEvent(data) + if err != nil { + return nil, "", fmt.Errorf("cannot unmarshal finding data: %w", err) + } + finding, ok := event.(*thorlog.Finding) + if !ok { + return nil, "", fmt.Errorf("data is not a valid finding: %s", id) + } + hash, err := os.ReadFile(path + suffixHash) + if err != nil { + if os.IsNotExist(err) { + return finding, "", nil // No content hash found, return finding without content. + } + return nil, "", fmt.Errorf("cannot read content hash: %w", err) + } + return finding, string(hash), nil +} + +func (s *Store) LoadContent(hash string) ([]byte, []*thorlog.Finding, error) { + if len(hash) < 2 { + return nil, nil, fmt.Errorf("content hash is too short, must be at least 2 characters: %s", hash) + } + path := s.path(subdirContent, hash) + data, err := os.ReadFile(path) + if err != nil { + return nil, nil, fmt.Errorf("cannot read content data: %w", err) + } + // Read the metadata file to get the list of findings that reference this content. + metadataFile, err := os.Open(path + suffixMetadata) + if err != nil { + return data, nil, fmt.Errorf("cannot read content metadata: %w", err) + } + defer func() { + _ = metadataFile.Close() + }() + var findings []*thorlog.Finding + reader := bufio.NewScanner(metadataFile) + for reader.Scan() { + event, err := parser.ParseEvent(reader.Bytes()) + if err != nil { + return nil, nil, fmt.Errorf("cannot parse finding metadata: %w", err) + } + finding, ok := event.(*thorlog.Finding) + if !ok { + return nil, nil, fmt.Errorf("metadata is not a valid finding: %s", string(reader.Bytes())) + } + findings = append(findings, finding) + } + return data, findings, nil +}