Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# See https://pre-commit.com for more information
# Install: pip install pre-commit && pre-commit install
repos:
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.29.4
hooks:
- id: check-jsonschema
name: Validate maintainer JSON files
files: ^content/maintainers/.*\.json$
args: ["--schemafile", "maintainer.schema.json"]
130 changes: 130 additions & 0 deletions maintainer.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://forklore.org/maintainer.schema.json",
"title": "Maintainer",
"description": "Schema for validating maintainer JSON files",
"type": "object",
"required": ["username", "full_name", "photo", "socials", "projects", "form"],
"additionalProperties": false,
"properties": {
"username": {
"type": "string",
"description": "Unique identifier for the maintainer",
"minLength": 1
},
"full_name": {
"type": "string",
"description": "Full name of the maintainer",
"minLength": 1
},
"photo": {
"type": "string",
"description": "URL to the maintainer's photo",
"format": "uri"
},
"designation": {
"type": "string",
"description": "Job title or role description"
},
"socials": {
"type": "array",
"description": "List of social media links",
"items": {
"type": "object",
"required": ["label", "link"],
"additionalProperties": false,
"properties": {
"label": {
"type": "string",
"description": "Social media platform name",
"enum": [
"GitHub",
"GitLab",
"Gitlab",
"Codeberg",
"BitBucket",
"LinkedIn",
"X",
"Twitter",
"Mastodon",
"Bluesky",
"Substack",
"Discourse",
"Email",
"RSS",
"Web"
]
},
"link": {
"type": "string",
"description": "URL to the social media profile",
"format": "uri"
}
}
}
},
"projects": {
"type": "array",
"description": "List of projects maintained",
"items": {
"type": "object",
"required": ["name", "project_link", "description", "short_description"],
"additionalProperties": false,
"properties": {
"name": {
"type": "string",
"description": "Name of the project",
"minLength": 1
},
"project_link": {
"type": "string",
"description": "URL to the project repository",
"format": "uri"
},
"website_link": {
"type": "string",
"description": "URL to the project website",
"format": "uri"
},
"logo": {
"type": "string",
"description": "URL to the project logo",
"format": "uri"
},
"description": {
"type": "string",
"description": "Full description of the project"
},
"short_description": {
"type": "string",
"description": "Short description used in cards and meta"
}
}
}
},
"form": {
"type": "array",
"description": "Q&A form responses",
"items": {
"type": "object",
"required": ["question", "response"],
"additionalProperties": false,
"properties": {
"question": {
"type": "string",
"description": "The question text"
},
"response": {
"type": "string",
"description": "The maintainer's response"
}
}
}
},
"created_on": {
"type": "string",
"description": "ISO 8601 timestamp of when the entry was created",
"format": "date-time"
}
}
}
125 changes: 122 additions & 3 deletions parse-maintainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,93 @@
import json
import re
from datetime import datetime
from pathlib import Path

try:
import jsonschema
HAS_JSONSCHEMA = True
except ImportError:
HAS_JSONSCHEMA = False


# Mapping of user input labels to schema-valid labels
LABEL_NORMALIZATION = {
# Lowercase variants
"github": "GitHub",
"gitlab": "GitLab",
"codeberg": "Codeberg",
"bitbucket": "BitBucket",
"linkedin": "LinkedIn",
"mastodon": "Mastodon",
"bluesky": "Bluesky",
"substack": "Substack",
"discourse": "Discourse",
"twitter": "Twitter",
"email": "Email",
"rss": "RSS",
"web": "Web",
"x": "X",
# Mixed case variants
"Github": "GitHub",
"Gitlab": "GitLab",
"Linkedin": "LinkedIn",
"Bluesky": "Bluesky",
"Bitbucket": "BitBucket",
# Common aliases
"Website": "Web",
"website": "Web",
"Blog": "Web",
"blog": "Web",
"Mail": "Email",
"mail": "Email",
"X/Twitter": "X",
"Twitter/X": "X",
}

# Valid labels as per schema
VALID_LABELS = {
"GitHub", "GitLab", "Gitlab", "Codeberg", "BitBucket", "LinkedIn",
"X", "Twitter", "Mastodon", "Bluesky", "Substack",
"Discourse", "Email", "RSS", "Web"
}


def normalize_label(label: str) -> str:
"""Normalize a social media label to match schema requirements."""
label = label.strip()

# Check if already valid
if label in VALID_LABELS:
return label

# Try normalization map
if label in LABEL_NORMALIZATION:
return LABEL_NORMALIZATION[label]

# Return as-is (will fail validation, but user will see error)
return label


def load_schema():
"""Load the JSON schema for validation."""
schema_path = Path(__file__).parent / "maintainer.schema.json"
if not schema_path.exists():
return None
with open(schema_path, encoding="utf-8") as f:
return json.load(f)


def validate_data(data: dict, schema: dict) -> list[str]:
"""Validate data against schema. Returns list of errors."""
if not HAS_JSONSCHEMA:
return []

errors = []
validator = jsonschema.Draft7Validator(schema, format_checker=jsonschema.FormatChecker())
for error in validator.iter_errors(data):
path = ".".join(str(p) for p in error.absolute_path) if error.absolute_path else "(root)"
errors.append(f" {path}: {error.message}")
return errors

def parse_issue(md):
# Remove HTML comments
Expand Down Expand Up @@ -34,8 +121,12 @@ def parse_issue(md):
if ':' in line:
line = line.lstrip('- ').strip()
label, link = line.split(':', 1)
# Normalize the label to match schema requirements
normalized_label = normalize_label(label.strip())
if normalized_label not in VALID_LABELS:
print(f"Warning: Unknown or invalid social label '{label.strip()}' (normalized: '{normalized_label}')", file=sys.stderr)
data['socials'].append({
"label": label.strip(),
"label": normalized_label,
"link": link.strip()
})

Expand Down Expand Up @@ -89,10 +180,19 @@ def parse_issue(md):

if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python parse_maintainer.py <input_file.md>")
print("Usage: python parse_maintainer.py <input_file.md> [--validate]")
print("Options:")
print(" --validate Validate output against JSON schema")
sys.exit(1)

with open(sys.argv[1], 'r', encoding='utf-8') as f:
args = sys.argv[1:]
validate_mode = "--validate" in args
if validate_mode:
args.remove("--validate")

input_file = args[0]

with open(input_file, 'r', encoding='utf-8') as f:
result = parse_issue(f.read())

# Output JSON
Expand All @@ -104,5 +204,24 @@ def parse_issue(md):
output_file = f"{username}.json"
with open(output_file, 'w', encoding='utf-8') as f:
f.write(json_output)
f.write("\n")

print(f"\nSaved to {output_file}", file=sys.stderr)

# Validate against schema if requested
if validate_mode:
schema = load_schema()
if schema is None:
print("Schema file not found, skipping validation", file=sys.stderr)
Comment on lines +214 to +215
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the schema file is available next to this file, will there be a case where it won't be found? I think this check would be redundant.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that i think about it, you're right. i dont think there will be a case where that occurs

elif not HAS_JSONSCHEMA:
print("jsonschema not installed, skipping validation", file=sys.stderr)
print("Install with: pip install jsonschema", file=sys.stderr)
else:
errors = validate_data(result, schema)
if errors:
print(f"\nValidation failed:", file=sys.stderr)
for error in errors:
print(error, file=sys.stderr)
sys.exit(1)
else:
print("Validation passed", file=sys.stderr)
Loading