Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: go
on:
push:
branches:
- master
pull_request:

env:
GOPRIVATE: github.com/NextronSystems/

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: stable
- name: Use token
run: git config --global url."https://${{ secrets.JSONLOG_ACCESS_TOKEN }}@github.com/".insteadOf "https://github.com/"
- name: Test
run: go test -v ./...
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: stable
- name: Use token
run: git config --global url."https://${{ secrets.JSONLOG_ACCESS_TOKEN }}@github.com/".insteadOf "https://github.com/"
- name: Lint
uses: golangci/golangci-lint-action@v8
with:
version: v2.1.6
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# THOR Finding Store

This package provides a simple on-disk store for findings generated by THOR scans.

Both findings and the content that was matched on (typically the file content) are stored, with both referencing each other.
A finding will have none or one content item, while a content item may be referenced by multiple findings.

## Layout

The store is a directory with the following structure:

```
├── findings
│ └── ab
│ ├── abcdef1234567890
│ └── abcdef1234567890.hash
└── samples
└── 1e
├── 1edc8bf0596dcdc0ca93b6dd89e14b57d0b4faf5da534d5487f9ed7ad0eb7e06
└── 1edc8bf0596dcdc0ca93b6dd89e14b57d0b4faf5da534d5487f9ed7ad0eb7e06.metadata
```

Where:

- `findings/` contains the findings, organized by their first two characters of the finding ID.

Each finding is stored in a file named after the finding ID, with an additional `.hash` file containing the SHA256 hash of the finding.
- `samples/` contains the content matched on, organized by the first two characters of the content's SHA256 hash.

Each content item is stored in a file named after its SHA256 hash, with an additional `.metadata` file containing the findings that reference this content item in newline delimited JSON format.

Optionally, by setting `Layout.Flat` to `true`, the store can be configured to not create subdirectories under `findings/` and `samples/`, but instead store all findings and samples in a single directory each.
17 changes: 17 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
module github.com/NextronSystems/finding-store

go 1.24

require (
github.com/NextronSystems/jsonlog v0.0.0-20250523073520-69e056dcf33d
github.com/stretchr/testify v1.10.0
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/exp v0.0.0-20240213143201-ec583247a57a // indirect
golang.org/x/mod v0.15.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
18 changes: 18 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
github.com/NextronSystems/jsonlog v0.0.0-20250523073520-69e056dcf33d h1:Lo5904HF61rzum1Md+xaQ0LsLMXb6EpzxaSPL/JRr50=
github.com/NextronSystems/jsonlog v0.0.0-20250523073520-69e056dcf33d/go.mod h1:Hk47VW018TX8o/0sxK+EJt16iRE7gB91zGZGiaAjcww=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/exp v0.0.0-20240213143201-ec583247a57a h1:HinSgX1tJRX3KsL//Gxynpw5CTOAIPhgL4W8PNiIpVE=
golang.org/x/exp v0.0.0-20240213143201-ec583247a57a/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
golang.org/x/mod v0.15.0 h1:SernR4v+D55NyBH2QiEQrlBAnj1ECL6AGrA5+dPaMY8=
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
114 changes: 114 additions & 0 deletions store.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package store

import (
"bytes"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"

"github.com/NextronSystems/jsonlog/thorlog/v3"
)

func New(root string) *Store {
return &Store{
RootDir: root,
Flat: false,
}
}

type Store struct {
RootDir string
Flat bool
}

const (
subdirFindings = "findings"
subdirContent = "samples"
suffixMetadata = ".metadata"
suffixHash = ".hash"
)

func (s *Store) Store(finding *thorlog.Finding, content io.ReadSeeker) error {
findingId := finding.Meta.GenID
if findingId == "" {
return fmt.Errorf("finding ID is empty, cannot store finding")
} else if len(findingId) < 2 {
return fmt.Errorf("finding ID is too short, must be at least 2 characters: %s", findingId)
}
var contentHash string
if content != nil {
// Shortcut: if the content is already hashed, we can use it directly.
if file, isFile := finding.Subject.(*thorlog.File); isFile && file.Hashes != nil {
contentHash = file.Hashes.Sha256
} else {
hash := sha256.New()
if _, err := io.Copy(hash, content); err != nil {
return fmt.Errorf("could not hash content: %w", err)
}
contentHash = hex.EncodeToString(hash.Sum(nil))
// Reset the content reader to the beginning for later use.
if _, err := content.Seek(0, io.SeekStart); err != nil {
return fmt.Errorf("cannot reset content reader: %w", err)
}
}
}
findingJson, err := json.Marshal(finding)
if err != nil {
return fmt.Errorf("cannot marshal finding: %w", err)
}
if err := s.storeData(subdirFindings, findingId, bytes.NewReader(findingJson), false); err != nil {
return fmt.Errorf("cannot store finding data: %w", err)
}
if content != nil {
if err := s.storeData(subdirContent, contentHash, content, false); err != nil {
if !os.IsExist(err) { // If the content already exists, we can ignore the error.
return fmt.Errorf("cannot store content data: %w", err)
}
}
// Store cross-references: Finding ID -> content hash, and content hash -> finding metadata.
// A finding can have only one content hash, but a content hash can be referenced by multiple findings.
if err := s.storeData(subdirFindings, findingId+suffixHash, strings.NewReader(contentHash), false); err != nil {
return fmt.Errorf("cannot store content hash for finding: %w", err)
}
if err := s.storeData(subdirContent, contentHash+suffixMetadata, bytes.NewReader(append(findingJson, '\n')), true); err != nil {
return fmt.Errorf("cannot store content metadata: %w", err)
}
}
return nil
}

func (s *Store) storeData(subdir string, id string, data io.Reader, append bool) error {
path := s.path(subdir, id)
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return fmt.Errorf("cannot create directory: %w", err)
}
var openFlags = os.O_WRONLY | os.O_CREATE
if append {
openFlags |= os.O_APPEND
} else {
openFlags |= os.O_EXCL
}
file, err := os.OpenFile(path, openFlags, 0644)
if err != nil {
return fmt.Errorf("cannot create file: %w", err)
}
defer func() {
_ = file.Close()
}()
if _, err := io.Copy(file, data); err != nil {
return fmt.Errorf("cannot write to file: %w", err)
}
return nil
}

func (s *Store) path(subdir string, id string) string {
if s.Flat {
return filepath.Join(s.RootDir, subdir, id)
}
return filepath.Join(s.RootDir, subdir, id[:2], id)
}
108 changes: 108 additions & 0 deletions store_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package store

import (
"bufio"
"bytes"
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"testing"
"time"

"github.com/NextronSystems/jsonlog/thorlog/parser"
"github.com/NextronSystems/jsonlog/thorlog/v3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestLayout(t *testing.T) {
rootDir := t.TempDir()
layout := New(rootDir)

finding := thorlog.NewFinding(thorlog.NewFile("test.txt"), "Test finding")
finding.Meta = thorlog.LogEventMetadata{
GenID: "abcdef1234567890",
Time: time.Now(),
Lvl: thorlog.Alert,
}
var content = []byte("This is a test content for the finding.")
sha256Hash := sha256.Sum256(content)
if err := layout.Store(finding, bytes.NewReader(content)); err != nil {
t.Fatalf("Failed to store finding: %v", err)
}
hashString := hex.EncodeToString(sha256Hash[:])

readFinding, contentHash, err := layout.LoadFinding("abcdef1234567890")
require.NoError(t, err)
assert.Equal(t, contentHash, hashString)
assert.NotNil(t, readFinding)
assert.Equal(t, finding.Meta.GenID, readFinding.Meta.GenID)
assert.Equal(t, finding.Subject.(*thorlog.File).Path, "test.txt")

loadedContent, findings, err := layout.LoadContent(hashString)
require.NoError(t, err)
assert.Equal(t, content, loadedContent)
assert.Len(t, findings, 1)
assert.Equal(t, findings[0].Meta.GenID, readFinding.Meta.GenID)
}

func (s *Store) LoadFinding(id string) (*thorlog.Finding, string, error) {
if len(id) < 2 {
return nil, "", fmt.Errorf("finding ID is too short, must be at least 2 characters: %s", id)
}
path := s.path(subdirFindings, id)
data, err := os.ReadFile(path)
if err != nil {
return nil, "", fmt.Errorf("cannot read finding data: %w", err)
}
event, err := parser.ParseEvent(data)
if err != nil {
return nil, "", fmt.Errorf("cannot unmarshal finding data: %w", err)
}
finding, ok := event.(*thorlog.Finding)
if !ok {
return nil, "", fmt.Errorf("data is not a valid finding: %s", id)
}
hash, err := os.ReadFile(path + suffixHash)
if err != nil {
if os.IsNotExist(err) {
return finding, "", nil // No content hash found, return finding without content.
}
return nil, "", fmt.Errorf("cannot read content hash: %w", err)
}
return finding, string(hash), nil
}

func (s *Store) LoadContent(hash string) ([]byte, []*thorlog.Finding, error) {
if len(hash) < 2 {
return nil, nil, fmt.Errorf("content hash is too short, must be at least 2 characters: %s", hash)
}
path := s.path(subdirContent, hash)
data, err := os.ReadFile(path)
if err != nil {
return nil, nil, fmt.Errorf("cannot read content data: %w", err)
}
// Read the metadata file to get the list of findings that reference this content.
metadataFile, err := os.Open(path + suffixMetadata)
if err != nil {
return data, nil, fmt.Errorf("cannot read content metadata: %w", err)
}
defer func() {
_ = metadataFile.Close()
}()
var findings []*thorlog.Finding
reader := bufio.NewScanner(metadataFile)
for reader.Scan() {
event, err := parser.ParseEvent(reader.Bytes())
if err != nil {
return nil, nil, fmt.Errorf("cannot parse finding metadata: %w", err)
}
finding, ok := event.(*thorlog.Finding)
if !ok {
return nil, nil, fmt.Errorf("metadata is not a valid finding: %s", string(reader.Bytes()))
}
findings = append(findings, finding)
}
return data, findings, nil
}