Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions dataset.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://github.com/Machine-Learning-for-Medical-Language/cnlp_transformers/blob/main/dataset.schema.json",
"title": "Dataset",
"description": "JSON file containing a CNLPT Dataset",
"type": "object",
"additionalProperties": false,
"properties": {
"data": {
"description": "Array of data instances",
"type": "array",
"items": {
"description": "One data instance",
"type": "object",
"properties": {
"text": {
"description": "The raw text of the instance",
"type": "string"
},
"id": {
"description": "The identifier of the instance",
"type": "string"
}
},
"additionalProperties": {
"description": "The label for the subtask given by the name of this property",
"type": "string"
},
"required": ["text", "id"],
"minProperties": 3
}
},
"metadata": {
"description": "Optional in-file metadata specification; must be included if a metadata.json file is absent",
"$ref": "metadata.schema.json"
}
}
}
43 changes: 43 additions & 0 deletions metadata.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://github.com/Machine-Learning-for-Medical-Language/cnlp_transformers/blob/main/metadata.schema.json",
"title": "Metadata",
"description": "Metadata for a CNLPT Dataset",
"type": "object",
"additionalProperties": false,
"properties": {
"version": {
"description": "The version of this specific dataset",
"type": "string"
},
"task": {
"description": "Name of the task this dataset contains",
"type": "string"
},
"subtasks": {
"description": "List of individual subtasks this dataset has labels for",
"type": "array",
"items": {
"description": "A subtask with a specific output mode",
"type": "object",
"additionalProperties": false,
"properties": {
"task_name": {
"description": "Name of this subtask",
"type": "string"
},
"output_mode": {
"description": "The type of classification that this subtask has labels for",
"type": "string",
"enum": ["tagging", "relations", "classification", "conceptnorm"]
}
},
"required": ["task_name", "output_mode"]
},
"minItems": 1,
"uniqueItems": true,
"$comment": "FIXME: what is uniqueness here"
}
},
"required": ["task", "subtasks"]
}