fossunited · Praneel7015 · Dec 8, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 17, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+# See https://pre-commit.com for more information
+# Install: pip install pre-commit && pre-commit install
+repos:
+  - repo: https://github.com/python-jsonschema/check-jsonschema
+    rev: 0.29.4
+    hooks:
+      - id: check-jsonschema
+        name: Validate maintainer JSON files
+        files: ^content/maintainers/.*\.json$
+        args: ["--schemafile", "maintainer.schema.json"]
diff --git a/maintainer.schema.json b/maintainer.schema.json
@@ -0,0 +1,130 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://forklore.org/maintainer.schema.json",
+  "title": "Maintainer",
+  "description": "Schema for validating maintainer JSON files",
+  "type": "object",
+  "required": ["username", "full_name", "photo", "socials", "projects", "form"],
+  "additionalProperties": false,
+  "properties": {
+    "username": {
+      "type": "string",
+      "description": "Unique identifier for the maintainer",
+      "minLength": 1
+    },
+    "full_name": {
+      "type": "string",
+      "description": "Full name of the maintainer",
+      "minLength": 1
+    },
+    "photo": {
+      "type": "string",
+      "description": "URL to the maintainer's photo",
+      "format": "uri"
+    },
+    "designation": {
+      "type": "string",
+      "description": "Job title or role description"
+    },
+    "socials": {
+      "type": "array",
+      "description": "List of social media links",
+      "items": {
+        "type": "object",
+        "required": ["label", "link"],
+        "additionalProperties": false,
+        "properties": {
+          "label": {
+            "type": "string",
+            "description": "Social media platform name",
+            "enum": [
+              "GitHub",
+              "GitLab",
+              "Gitlab",
+              "Codeberg",
+              "BitBucket",
+              "LinkedIn",
+              "X",
+              "Twitter",
+              "Mastodon",
+              "Bluesky",
+              "Substack",
+              "Discourse",
+              "Email",
+              "RSS",
+              "Web"
+            ]
+          },
+          "link": {
+            "type": "string",
+            "description": "URL to the social media profile",
+            "format": "uri"
+          }
+        }
+      }
+    },
+    "projects": {
+      "type": "array",
+      "description": "List of projects maintained",
+      "items": {
+        "type": "object",
+        "required": ["name", "project_link", "description", "short_description"],
+        "additionalProperties": false,
+        "properties": {
+          "name": {
+            "type": "string",
+            "description": "Name of the project",
+            "minLength": 1
+          },
+          "project_link": {
+            "type": "string",
+            "description": "URL to the project repository",
+            "format": "uri"
+          },
+          "website_link": {
+            "type": "string",
+            "description": "URL to the project website",
+            "format": "uri"
+          },
+          "logo": {
+            "type": "string",
+            "description": "URL to the project logo",
+            "format": "uri"
+          },
+          "description": {
+            "type": "string",
+            "description": "Full description of the project"
+          },
+          "short_description": {
+            "type": "string",
+            "description": "Short description used in cards and meta"
+          }
+        }
+      }
+    },
+    "form": {
+      "type": "array",
+      "description": "Q&A form responses",
+      "items": {
+        "type": "object",
+        "required": ["question", "response"],
+        "additionalProperties": false,
+        "properties": {
+          "question": {
+            "type": "string",
+            "description": "The question text"
+          },
+          "response": {
+            "type": "string",
+            "description": "The maintainer's response"
+          }
+        }
+      }
+    },
+    "created_on": {
+      "type": "string",
+      "description": "ISO 8601 timestamp of when the entry was created",
+      "format": "date-time"
+    }
+  }
+}
diff --git a/parse-maintainer.py b/parse-maintainer.py
@@ -4,6 +4,93 @@
 import json
 import re
 from datetime import datetime
+from pathlib import Path
+
+try:
+    import jsonschema
+    HAS_JSONSCHEMA = True
+except ImportError:
+    HAS_JSONSCHEMA = False
+
+
+# Mapping of user input labels to schema-valid labels
+LABEL_NORMALIZATION = {
+    # Lowercase variants
+    "github": "GitHub",
+    "gitlab": "GitLab",
+    "codeberg": "Codeberg",
+    "bitbucket": "BitBucket",
+    "linkedin": "LinkedIn",
+    "mastodon": "Mastodon",
+    "bluesky": "Bluesky",
+    "substack": "Substack",
+    "discourse": "Discourse",
+    "twitter": "Twitter",
+    "email": "Email",
+    "rss": "RSS",
+    "web": "Web",
+    "x": "X",
+    # Mixed case variants
+    "Github": "GitHub",
+    "Gitlab": "GitLab",
+    "Linkedin": "LinkedIn",
+    "Bluesky": "Bluesky",
+    "Bitbucket": "BitBucket",
+    # Common aliases
+    "Website": "Web",
+    "website": "Web",
+    "Blog": "Web",
+    "blog": "Web",
+    "Mail": "Email",
+    "mail": "Email",
+    "X/Twitter": "X",
+    "Twitter/X": "X",
+}
+
+# Valid labels as per schema
+VALID_LABELS = {
+    "GitHub", "GitLab", "Gitlab", "Codeberg", "BitBucket", "LinkedIn",
+    "X", "Twitter", "Mastodon", "Bluesky", "Substack",
+    "Discourse", "Email", "RSS", "Web"
+}
+
+
+def normalize_label(label: str) -> str:
+    """Normalize a social media label to match schema requirements."""
+    label = label.strip()
+
+    # Check if already valid
+    if label in VALID_LABELS:
+        return label
+
+    # Try normalization map
+    if label in LABEL_NORMALIZATION:
+        return LABEL_NORMALIZATION[label]
+
+    # Return as-is (will fail validation, but user will see error)
+    return label
+
+
+def load_schema():
+    """Load the JSON schema for validation."""
+    schema_path = Path(__file__).parent / "maintainer.schema.json"
+    if not schema_path.exists():
+        return None
+    with open(schema_path, encoding="utf-8") as f:
+        return json.load(f)
+
+
+def validate_data(data: dict, schema: dict) -> list[str]:
+    """Validate data against schema. Returns list of errors."""
+    if not HAS_JSONSCHEMA:
+        return []
+
+    errors = []
+    validator = jsonschema.Draft7Validator(schema, format_checker=jsonschema.FormatChecker())
+    for error in validator.iter_errors(data):
+        path = ".".join(str(p) for p in error.absolute_path) if error.absolute_path else "(root)"
+        errors.append(f"  {path}: {error.message}")
+    return errors
 
 def parse_issue(md):
     # Remove HTML comments
@@ -34,8 +121,12 @@ def parse_issue(md):
             if ':' in line:
                 line = line.lstrip('- ').strip()
                 label, link = line.split(':', 1)
+                # Normalize the label to match schema requirements
+                normalized_label = normalize_label(label.strip())
+                if normalized_label not in VALID_LABELS:
+                    print(f"Warning: Unknown or invalid social label '{label.strip()}' (normalized: '{normalized_label}')", file=sys.stderr)
                 data['socials'].append({
-                    "label": label.strip(),
+                    "label": normalized_label,
                     "link": link.strip()
                 })
 
@@ -89,10 +180,19 @@ def parse_issue(md):
 
 if __name__ == "__main__":
     if len(sys.argv) < 2:
-        print("Usage: python parse_maintainer.py <input_file.md>")
+        print("Usage: python parse_maintainer.py <input_file.md> [--validate]")
+        print("Options:")
+        print("  --validate    Validate output against JSON schema")
         sys.exit(1)
 
-    with open(sys.argv[1], 'r', encoding='utf-8') as f:
+    args = sys.argv[1:]
+    validate_mode = "--validate" in args
+    if validate_mode:
+        args.remove("--validate")
+
+    input_file = args[0]
+
+    with open(input_file, 'r', encoding='utf-8') as f:
         result = parse_issue(f.read())
 
     # Output JSON
@@ -104,5 +204,24 @@ def parse_issue(md):
     output_file = f"{username}.json"
     with open(output_file, 'w', encoding='utf-8') as f:
         f.write(json_output)
+        f.write("\n")
 
     print(f"\nSaved to {output_file}", file=sys.stderr)
+
+    # Validate against schema if requested
+    if validate_mode:
+        schema = load_schema()
+        if schema is None:
+            print("Schema file not found, skipping validation", file=sys.stderr)
+        elif not HAS_JSONSCHEMA:
+            print("jsonschema not installed, skipping validation", file=sys.stderr)
+            print("Install with: pip install jsonschema", file=sys.stderr)
+        else:
+            errors = validate_data(result, schema)
+            if errors:
+                print(f"\nValidation failed:", file=sys.stderr)
+                for error in errors:
+                    print(error, file=sys.stderr)
+                sys.exit(1)
+            else:
+                print("Validation passed", file=sys.stderr)