Skip to content

Commit 0cc14b8

Browse files
committed
feat: initial commit
1 parent 425dd35 commit 0cc14b8

6 files changed

Lines changed: 324 additions & 0 deletions

File tree

.github/workflows/go.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: go
2+
on:
3+
push:
4+
branches:
5+
- master
6+
pull_request:
7+
8+
env:
9+
GOPRIVATE: github.com/NextronSystems/
10+
11+
jobs:
12+
build:
13+
runs-on: ubuntu-latest
14+
steps:
15+
- uses: actions/checkout@v4
16+
- uses: actions/setup-go@v5
17+
with:
18+
go-version: stable
19+
- name: Use token
20+
run: git config --global url."https://${{ secrets.JSONLOG_ACCESS_TOKEN }}@github.com/".insteadOf "https://github.com/"
21+
- name: Test
22+
run: go test -v ./...
23+
lint:
24+
runs-on: ubuntu-latest
25+
steps:
26+
- uses: actions/checkout@v4
27+
- uses: actions/setup-go@v5
28+
with:
29+
go-version: stable
30+
- name: Use token
31+
run: git config --global url."https://${{ secrets.JSONLOG_ACCESS_TOKEN }}@github.com/".insteadOf "https://github.com/"
32+
- name: Lint
33+
uses: golangci/golangci-lint-action@v8
34+
with:
35+
version: v2.1.6

README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# THOR Finding Store
2+
3+
This package provides a simple on-disk store for findings generated by THOR scans.
4+
5+
Both findings and the content that was matched on (typically the file content) are stored, with both referencing each other.
6+
A finding will have none or one content item, while a content item may be referenced by multiple findings.
7+
8+
## Layout
9+
10+
The store is a directory with the following structure:
11+
12+
```
13+
├── findings
14+
│ └── ab
15+
│ ├── abcdef1234567890
16+
│ └── abcdef1234567890.hash
17+
└── samples
18+
└── 1e
19+
├── 1edc8bf0596dcdc0ca93b6dd89e14b57d0b4faf5da534d5487f9ed7ad0eb7e06
20+
└── 1edc8bf0596dcdc0ca93b6dd89e14b57d0b4faf5da534d5487f9ed7ad0eb7e06.metadata
21+
```
22+
23+
Where:
24+
25+
- `findings/` contains the findings, organized by their first two characters of the finding ID.
26+
27+
Each finding is stored in a file named after the finding ID, with an additional `.hash` file containing the SHA256 hash of the finding.
28+
- `samples/` contains the content matched on, organized by the first two characters of the content's SHA256 hash.
29+
30+
Each content item is stored in a file named after its SHA256 hash, with an additional `.metadata` file containing the findings that reference this content item in newline-separated JSON format.
31+
32+
Optionally, by setting `Layout.SingleDir` to `true`, the store can be configured to not create subdirectories under `findings/` and `samples/`, but instead store all findings and samples in a single directory each.

go.mod

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
module github.com/NextronSystems/finding-store
2+
3+
go 1.24
4+
5+
require (
6+
github.com/NextronSystems/jsonlog v0.0.0-20250523073520-69e056dcf33d
7+
github.com/stretchr/testify v1.10.0
8+
)
9+
10+
require (
11+
github.com/davecgh/go-spew v1.1.1 // indirect
12+
github.com/google/uuid v1.6.0 // indirect
13+
github.com/pmezard/go-difflib v1.0.0 // indirect
14+
golang.org/x/exp v0.0.0-20240213143201-ec583247a57a // indirect
15+
golang.org/x/mod v0.15.0 // indirect
16+
gopkg.in/yaml.v3 v3.0.1 // indirect
17+
)

go.sum

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
github.com/NextronSystems/jsonlog v0.0.0-20250523073520-69e056dcf33d h1:Lo5904HF61rzum1Md+xaQ0LsLMXb6EpzxaSPL/JRr50=
2+
github.com/NextronSystems/jsonlog v0.0.0-20250523073520-69e056dcf33d/go.mod h1:Hk47VW018TX8o/0sxK+EJt16iRE7gB91zGZGiaAjcww=
3+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
5+
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
6+
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
7+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
8+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
9+
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
10+
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
11+
golang.org/x/exp v0.0.0-20240213143201-ec583247a57a h1:HinSgX1tJRX3KsL//Gxynpw5CTOAIPhgL4W8PNiIpVE=
12+
golang.org/x/exp v0.0.0-20240213143201-ec583247a57a/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
13+
golang.org/x/mod v0.15.0 h1:SernR4v+D55NyBH2QiEQrlBAnj1ECL6AGrA5+dPaMY8=
14+
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
15+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
16+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
17+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
18+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

layout.go

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package store
2+
3+
import (
4+
"bytes"
5+
"crypto/sha256"
6+
"encoding/hex"
7+
"encoding/json"
8+
"fmt"
9+
"io"
10+
"os"
11+
"path/filepath"
12+
"strings"
13+
14+
"github.com/NextronSystems/jsonlog/thorlog/v3"
15+
)
16+
17+
func New(root string) *Layout {
18+
return &Layout{
19+
RootDir: root,
20+
SingleDir: false,
21+
}
22+
}
23+
24+
type Layout struct {
25+
RootDir string
26+
SingleDir bool
27+
}
28+
29+
const (
30+
subdirFindings = "findings"
31+
subdirContent = "samples"
32+
suffixMetadata = ".metadata"
33+
suffixHash = ".hash"
34+
)
35+
36+
func (l *Layout) Store(finding *thorlog.Finding, content io.ReadSeeker) error {
37+
findingId := finding.Meta.GenID
38+
if findingId == "" {
39+
return fmt.Errorf("finding ID is empty, cannot store finding")
40+
} else if len(findingId) < 2 {
41+
return fmt.Errorf("finding ID is too short, must be at least 2 characters: %s", findingId)
42+
}
43+
var contentHash string
44+
if content != nil {
45+
// Shortcut: if the content is already hashed, we can use it directly.
46+
if file, isFile := finding.Subject.(*thorlog.File); isFile && file.Hashes != nil {
47+
contentHash = file.Hashes.Sha256
48+
} else {
49+
hash := sha256.New()
50+
if _, err := io.Copy(hash, content); err != nil {
51+
return fmt.Errorf("could not hash content: %w", err)
52+
}
53+
contentHash = hex.EncodeToString(hash.Sum(nil))
54+
// Reset the content reader to the beginning for later use.
55+
if _, err := content.Seek(0, io.SeekStart); err != nil {
56+
return fmt.Errorf("cannot reset content reader: %w", err)
57+
}
58+
}
59+
}
60+
findingJson, err := json.Marshal(finding)
61+
if err != nil {
62+
return fmt.Errorf("cannot marshal finding: %w", err)
63+
}
64+
if err := l.storeData(subdirFindings, findingId, bytes.NewReader(findingJson), false); err != nil {
65+
return fmt.Errorf("cannot store finding data: %w", err)
66+
}
67+
if content != nil {
68+
if err := l.storeData(subdirContent, contentHash, content, false); err != nil {
69+
if !os.IsExist(err) { // If the content already exists, we can ignore the error.
70+
return fmt.Errorf("cannot store content data: %w", err)
71+
}
72+
}
73+
// Store cross-references: Finding ID -> content hash, and content hash -> finding metadata.
74+
// A finding can have only one content hash, but a content hash can be referenced by multiple findings.
75+
if err := l.storeData(subdirFindings, findingId+suffixHash, strings.NewReader(contentHash), false); err != nil {
76+
return fmt.Errorf("cannot store content hash for finding: %w", err)
77+
}
78+
if err := l.storeData(subdirContent, contentHash+suffixMetadata, bytes.NewReader(append(findingJson, '\n')), true); err != nil {
79+
return fmt.Errorf("cannot store content metadata: %w", err)
80+
}
81+
}
82+
return nil
83+
}
84+
85+
func (l *Layout) storeData(subdir string, id string, data io.Reader, append bool) error {
86+
path := l.path(subdir, id)
87+
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
88+
return fmt.Errorf("cannot create directory: %w", err)
89+
}
90+
var openFlags = os.O_WRONLY | os.O_CREATE
91+
if append {
92+
openFlags |= os.O_APPEND
93+
} else {
94+
openFlags |= os.O_EXCL
95+
}
96+
file, err := os.OpenFile(path, openFlags, 0644)
97+
if err != nil {
98+
return fmt.Errorf("cannot create file: %w", err)
99+
}
100+
defer func() {
101+
_ = file.Close()
102+
}()
103+
if _, err := io.Copy(file, data); err != nil {
104+
return fmt.Errorf("cannot write to file: %w", err)
105+
}
106+
return nil
107+
}
108+
109+
func (l *Layout) path(subdir string, id string) string {
110+
if l.SingleDir {
111+
return filepath.Join(l.RootDir, subdir, id)
112+
}
113+
return filepath.Join(l.RootDir, subdir, id[:2], id)
114+
}

layout_test.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package store
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"crypto/sha256"
7+
"encoding/hex"
8+
"fmt"
9+
"os"
10+
"testing"
11+
"time"
12+
13+
"github.com/NextronSystems/jsonlog/thorlog/parser"
14+
"github.com/NextronSystems/jsonlog/thorlog/v3"
15+
"github.com/stretchr/testify/assert"
16+
"github.com/stretchr/testify/require"
17+
)
18+
19+
func TestLayout(t *testing.T) {
20+
rootDir := t.TempDir()
21+
layout := New(rootDir)
22+
23+
finding := thorlog.NewFinding(thorlog.NewFile("test.txt"), "Test finding")
24+
finding.Meta = thorlog.LogEventMetadata{
25+
GenID: "abcdef1234567890",
26+
Time: time.Now(),
27+
Lvl: thorlog.Alert,
28+
}
29+
var content = []byte("This is a test content for the finding.")
30+
sha256Hash := sha256.Sum256(content)
31+
if err := layout.Store(finding, bytes.NewReader(content)); err != nil {
32+
t.Fatalf("Failed to store finding: %v", err)
33+
}
34+
hashString := hex.EncodeToString(sha256Hash[:])
35+
36+
readFinding, contentHash, err := layout.LoadFinding("abcdef1234567890")
37+
require.NoError(t, err)
38+
assert.Equal(t, contentHash, hashString)
39+
assert.NotNil(t, readFinding)
40+
assert.Equal(t, finding.Meta.GenID, readFinding.Meta.GenID)
41+
assert.Equal(t, finding.Subject.(*thorlog.File).Path, "test.txt")
42+
43+
loadedContent, findings, err := layout.LoadContent(hashString)
44+
require.NoError(t, err)
45+
assert.Equal(t, content, loadedContent)
46+
assert.Len(t, findings, 1)
47+
assert.Equal(t, findings[0].Meta.GenID, readFinding.Meta.GenID)
48+
}
49+
50+
func (l *Layout) LoadFinding(id string) (*thorlog.Finding, string, error) {
51+
if len(id) < 2 {
52+
return nil, "", fmt.Errorf("finding ID is too short, must be at least 2 characters: %s", id)
53+
}
54+
path := l.path(subdirFindings, id)
55+
data, err := os.ReadFile(path)
56+
if err != nil {
57+
return nil, "", fmt.Errorf("cannot read finding data: %w", err)
58+
}
59+
event, err := parser.ParseEvent(data)
60+
if err != nil {
61+
return nil, "", fmt.Errorf("cannot unmarshal finding data: %w", err)
62+
}
63+
finding, ok := event.(*thorlog.Finding)
64+
if !ok {
65+
return nil, "", fmt.Errorf("data is not a valid finding: %s", id)
66+
}
67+
hash, err := os.ReadFile(path + suffixHash)
68+
if err != nil {
69+
if os.IsNotExist(err) {
70+
return finding, "", nil // No content hash found, return finding without content.
71+
}
72+
return nil, "", fmt.Errorf("cannot read content hash: %w", err)
73+
}
74+
return finding, string(hash), nil
75+
}
76+
77+
func (l *Layout) LoadContent(hash string) ([]byte, []*thorlog.Finding, error) {
78+
if len(hash) < 2 {
79+
return nil, nil, fmt.Errorf("content hash is too short, must be at least 2 characters: %s", hash)
80+
}
81+
path := l.path(subdirContent, hash)
82+
data, err := os.ReadFile(path)
83+
if err != nil {
84+
return nil, nil, fmt.Errorf("cannot read content data: %w", err)
85+
}
86+
// Read the metadata file to get the list of findings that reference this content.
87+
metadataFile, err := os.Open(path + suffixMetadata)
88+
if err != nil {
89+
return data, nil, fmt.Errorf("cannot read content metadata: %w", err)
90+
}
91+
defer func() {
92+
_ = metadataFile.Close()
93+
}()
94+
var findings []*thorlog.Finding
95+
reader := bufio.NewScanner(metadataFile)
96+
for reader.Scan() {
97+
event, err := parser.ParseEvent(reader.Bytes())
98+
if err != nil {
99+
return nil, nil, fmt.Errorf("cannot parse finding metadata: %w", err)
100+
}
101+
finding, ok := event.(*thorlog.Finding)
102+
if !ok {
103+
return nil, nil, fmt.Errorf("metadata is not a valid finding: %s", string(reader.Bytes()))
104+
}
105+
findings = append(findings, finding)
106+
}
107+
return data, findings, nil
108+
}

0 commit comments

Comments
 (0)