Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions internal/filter/strings.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ func DefaultFilterOptions() FilterOptions {
}

type FilteredString struct {
Value string
Score float64
IsWide bool
IsHighScoring bool
GoodwareCount int
EncodingInfo *extractor.EncodingDetection
MatchedRules []scoring.MatchedRule
Value string
Score float64
IsWide bool
IsHighScoring bool
GoodwareCount int
EncodingInfo *extractor.EncodingDetection
MatchedRules []scoring.MatchedRule
}

func FilterStrings(
Expand All @@ -52,7 +52,11 @@ func FilterStrings(

goodwareCount := 0
if goodwareDB != nil {
goodwareCount = goodwareDB[displayStr]
if count, exists := goodwareDB[s]; exists {
goodwareCount = count
} else {
goodwareCount = goodwareDB[displayStr]
}
}

scoreResult := scoringEngine.ScoreWithGoodware(displayStr, goodwareCount, opts.ExcludeGoodware)
Expand Down
46 changes: 46 additions & 0 deletions internal/filter/strings_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package filter

import (
"path/filepath"
"testing"

"github.com/Neo23x0/yarGen-go/internal/database"
"github.com/Neo23x0/yarGen-go/internal/scoring"
)

func newTestEngine(t *testing.T) *scoring.Engine {
t.Helper()

store, err := scoring.NewStore(filepath.Join(t.TempDir(), "scoring.db"))
if err != nil {
t.Fatalf("failed to create scoring store: %v", err)
}
t.Cleanup(func() {
_ = store.Close()
})

engine, err := scoring.NewEngine(store)
if err != nil {
t.Fatalf("failed to create scoring engine: %v", err)
}

return engine
}

func TestFilterStrings_ExcludesWideGoodwareWithPrefixedKey(t *testing.T) {
engine := newTestEngine(t)

opts := DefaultFilterOptions()
opts.ExcludeGoodware = true

result := FilterStrings(
[]string{"UTF16LE:cmd.exe"},
database.Counter{"UTF16LE:cmd.exe": 1},
engine,
opts,
)

if len(result) != 0 {
t.Fatalf("expected wide goodware string to be excluded, got %d results", len(result))
}
}
53 changes: 39 additions & 14 deletions internal/service/yargen.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"path/filepath"
"sort"
"strings"
"sync"

"github.com/Neo23x0/yarGen-go/internal/config"
"github.com/Neo23x0/yarGen-go/internal/database"
Expand All @@ -20,14 +21,16 @@ import (
type YarGen struct {
config *config.Config
goodwareDB *database.LoadedDatabases
dbMu sync.RWMutex
scoringEngine *scoring.Engine
scoringStore *scoring.Store
llmClient llm.Client
}

type Result struct {
Rules string
DebugLog string
Rules string
DebugLog string
FileStrings map[string][]filter.FilteredString
}

type Options struct {
Expand Down Expand Up @@ -113,6 +116,25 @@ func (y *YarGen) Close() error {
}

func (y *YarGen) LoadDatabases(includeOpcodes bool) error {
return y.ensureDatabases(includeOpcodes)
}

func (y *YarGen) ensureDatabases(includeOpcodes bool) error {
y.dbMu.RLock()
needsLoad := y.goodwareDB == nil || (includeOpcodes && len(y.goodwareDB.Opcodes) == 0)
y.dbMu.RUnlock()
if !needsLoad {
return nil
}

y.dbMu.Lock()
defer y.dbMu.Unlock()

needsLoad = y.goodwareDB == nil || (includeOpcodes && len(y.goodwareDB.Opcodes) == 0)
if !needsLoad {
return nil
}

dbs, err := database.LoadAllDatabases(y.config.Database.DbsDir, includeOpcodes)
if err != nil {
return err
Expand All @@ -124,12 +146,14 @@ func (y *YarGen) LoadDatabases(includeOpcodes bool) error {
func (y *YarGen) Generate(ctx context.Context, opts Options) (*Result, error) {
var debugLog strings.Builder

if y.goodwareDB == nil {
if err := y.LoadDatabases(opts.IncludeOpcodes); err != nil {
return nil, err
}
if err := y.ensureDatabases(opts.IncludeOpcodes); err != nil {
return nil, err
}

y.dbMu.RLock()
goodwareDB := y.goodwareDB
y.dbMu.RUnlock()

fmt.Printf("[+] Scanning malware directory: %s\n", opts.MalwareDir)

scanOpts := scanner.ScanOptions{
Expand Down Expand Up @@ -170,7 +194,7 @@ func (y *YarGen) Generate(ctx context.Context, opts Options) (*Result, error) {

filtered := filter.FilterStrings(
file.Strings,
y.goodwareDB.Strings,
goodwareDB.Strings,
y.scoringEngine,
filterOpts,
)
Expand Down Expand Up @@ -229,7 +253,7 @@ func (y *YarGen) Generate(ctx context.Context, opts Options) (*Result, error) {
fileStrings[file.Path] = filtered

if opts.IncludeOpcodes {
filteredOpcodes := filter.FilterOpcodesWithLimit(file.Opcodes, y.goodwareDB.Opcodes, opts.NumOpcodes)
filteredOpcodes := filter.FilterOpcodesWithLimit(file.Opcodes, goodwareDB.Opcodes, opts.NumOpcodes)
fileOpcodes[file.Path] = filteredOpcodes
}
}
Expand Down Expand Up @@ -274,8 +298,9 @@ func (y *YarGen) Generate(ctx context.Context, opts Options) (*Result, error) {
fmt.Printf("[=] Generated %d simple rules, %d super rules\n", len(generated.SimpleRules), len(generated.SuperRules))

return &Result{
Rules: output,
DebugLog: debugLog.String(),
Rules: output,
DebugLog: debugLog.String(),
FileStrings: fileStrings,
}, nil
}

Expand Down Expand Up @@ -306,11 +331,11 @@ func (y *YarGen) refineWithLLM(ctx context.Context, fileName string, strs []filt
}

type scoredString struct {
Index int
String filter.FilteredString
Index int
String filter.FilteredString
HeuristicScore float64
LLMScore int
CombinedScore float64
LLMScore int
CombinedScore float64
}

scored := make([]scoredString, len(strs))
Expand Down
Loading
Loading