From c1cfba3ec69c930d853596bc35bb0b76d70d168e Mon Sep 17 00:00:00 2001 From: ljstella Date: Tue, 7 Oct 2025 15:31:07 -0400 Subject: [PATCH 1/2] New filter --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index a6e9819c5..70a90aaef 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ -*.json filter=lfs diff=lfs merge=lfs -text *.log filter=lfs diff=lfs merge=lfs -text *.log text encoding=utf-8 +datasets/**/*.json filter=lfs diff=lfs merge=lfs -text From 8ffb35f2d108fb180e8c4b711f0174c5dae886a2 Mon Sep 17 00:00:00 2001 From: ljstella Date: Tue, 7 Oct 2025 15:31:37 -0400 Subject: [PATCH 2/2] Touch the schema --- bin/dataset_schema.json | 94 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 3 deletions(-) diff --git a/bin/dataset_schema.json b/bin/dataset_schema.json index 6acc39cf5..8dc422af7 100644 --- a/bin/dataset_schema.json +++ b/bin/dataset_schema.json @@ -1,3 +1,91 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:97085370d23378475c243e900bfeb0b462b849ff3e2b4f38fec5547177c91a3b -size 2274 +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Attack Data Dataset Schema", + "description": "JSON Schema for validating YAML dataset files in the attack_data project", + "type": "object", + "required": [ + "author", + "id", + "date", + "description", + "environment", + "datasets" + ], + "properties": { + "author": { + "type": "string", + "minLength": 1, + "description": "Author(s) of the dataset" + }, + "id": { + "type": "string", + "format": "uuid", + "description": "UUID identifier for the dataset" + }, + "date": { + "type": "string", + "description": "Date of the dataset" + }, + "description": { + "type": "string", + "minLength": 1, + "description": "Description of the dataset" + }, + "environment": { + "type": "string", + "minLength": 1, + "description": "Environment where the dataset was created" + }, + "directory": { + "type": "string", + "minLength": 1, + "description": "Directory name for the dataset" + }, + "mitre_technique": { + "type": "array", + "items": { + "type": "string", + "pattern": "^T\\d{4}(\\.\\d{3})*$" + }, + "description": "List of MITRE ATT&CK technique IDs (can be empty)" + }, + "datasets": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": [ + "name", + "path", + "source", + "sourcetype" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1, + "description": "Name of the dataset" + }, + "path": { + "type": "string", + "minLength": 1, + "description": "Path to the dataset file" + }, + "source": { + "type": "string", + "minLength": 1, + "description": "Source of the data" + }, + "sourcetype": { + "type": "string", + "minLength": 1, + "description": "Type of the data source" + } + }, + "additionalProperties": false + }, + "description": "List of datasets (must contain at least one dataset)" + } + }, + "additionalProperties": false +}