From b9485a9336c7334dfb7448b602de397a77d26a85 Mon Sep 17 00:00:00 2001 From: JacobPEvans <20714140+JacobPEvans@users.noreply.github.com> Date: Sun, 15 Mar 2026 12:18:03 -0400 Subject: [PATCH 1/5] feat: add shared YAML-config file size checker with tiered limits Replace the blanket 500KB limit with a proper tiered system: - 5KB warning / 10KB error (default) - 10KB warning / 20KB error (extended, for CHANGELOG/README) - Per-repo overrides via .file-size.yml The reusable workflow now supports 3 priority levels: 1. Repo's own script (existing behavior preserved) 2. Shared YAML-config checker (new) 3. Inline fallback (legacy) DRY: same script runs in pre-commit hook and GH Actions. (claude) --- .cspell.json | 5 +- .file-size.yml | 32 +++ .github/workflows/_file-size.yml | 18 +- .pre-commit-config.yaml | 12 ++ scripts/workflows/check-file-sizes.sh | 292 ++++++++++++++++++++++++++ 5 files changed, 354 insertions(+), 5 deletions(-) create mode 100644 .file-size.yml create mode 100755 scripts/workflows/check-file-sizes.sh diff --git a/.cspell.json b/.cspell.json index cf80ea0..572cd22 100644 --- a/.cspell.json +++ b/.cspell.json @@ -38,7 +38,10 @@ "nixpkgs", "Anson", "pipefail", - "mikefarah" + "mikefarah", + "kislyuk", + "gsub", + "RLENGTH" ], "ignorePaths": [ ".git", diff --git a/.file-size.yml b/.file-size.yml new file mode 100644 index 0000000..290f7c8 --- /dev/null +++ b/.file-size.yml @@ -0,0 +1,32 @@ +# ============================================================================= +# Org-wide File Size Limits +# ============================================================================= +# Per-repo overrides: create .file-size.yml in the repo root. +# Sizes are in bytes. +# ============================================================================= + +defaults: + warn: 5120 # 5 KB + error: 10240 # 10 KB + +extended: + warn: 10240 # 10 KB + error: 20480 # 20 KB + files: + - CHANGELOG.md + - README.md + +exempt: + - "*.lock" + - "package-lock.json" + - "pnpm-lock.yaml" + +scan: + - .nix + - .md + - .sh + - .yml + - .yaml + - .tf + - .py + - .j2 diff --git a/.github/workflows/_file-size.yml b/.github/workflows/_file-size.yml index 55b76c8..87d5eaa 100644 --- a/.github/workflows/_file-size.yml +++ b/.github/workflows/_file-size.yml @@ -1,12 +1,12 @@ # Reusable: File Size Check -# Delegates to repo's own script if available, otherwise runs inline check. +# Delegates to repo's own script, shared YAML-config checker, or inline fallback. name: _file-size on: workflow_call: inputs: max-file-size-kb: - description: "Maximum file size in KB" + description: "Maximum file size in KB (legacy fallback)" type: number default: 500 exclude-patterns: @@ -32,11 +32,21 @@ jobs: env: MAX_KB: ${{ inputs.max-file-size-kb }} EXCLUDES: ${{ inputs.exclude-patterns }} + REPO_OWNER: ${{ github.repository_owner }} run: | - # Delegate to repo script if available + # Priority 1: Delegate to repo's own script [ -x "./scripts/workflows/check-file-sizes.sh" ] && exec ./scripts/workflows/check-file-sizes.sh - # Build find exclusions from JSON input + # Priority 2: Use shared YAML-config checker if .file-size.yml exists + if [ -f ".file-size.yml" ]; then + curl -sSfL \ + "https://raw.githubusercontent.com/${REPO_OWNER}/.github/main/scripts/workflows/check-file-sizes.sh" \ + -o /tmp/check-file-sizes.sh + chmod +x /tmp/check-file-sizes.sh + exec /tmp/check-file-sizes.sh + fi + + # Priority 3: Inline fallback (legacy) args=() for p in $(echo "$EXCLUDES" | jq -r '.[]'); do [[ "$p" == */* ]] && args+=(-not -path "$p") || args+=(-not -name "$p") diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index be529ce..a4fda20 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -74,3 +74,15 @@ repos: - id: check-branch name: Check branch naming conventions stages: [pre-push] + + # --------------------------------------------------------------------------- + # File size enforcement + # --------------------------------------------------------------------------- + - repo: local + hooks: + - id: file-size-check + name: Check file sizes + entry: scripts/workflows/check-file-sizes.sh + language: script + always_run: true + stages: [pre-commit] diff --git a/scripts/workflows/check-file-sizes.sh b/scripts/workflows/check-file-sizes.sh new file mode 100755 index 0000000..2e98da8 --- /dev/null +++ b/scripts/workflows/check-file-sizes.sh @@ -0,0 +1,292 @@ +#!/usr/bin/env bash +# ============================================================================= +# File Size Checker +# ============================================================================= +# Checks file sizes against YAML config (.file-size.yml). +# Reads per-file, extended, and default tier limits. +# Outputs GitHub Actions annotations in CI, plain text otherwise. +# Exit code = number of error-level violations (capped at 125). +# ============================================================================= +set -euo pipefail + +# --------------------------------------------------------------------------- +# Defaults (used when no config file is present) +# --------------------------------------------------------------------------- +DEFAULT_WARN=5120 +DEFAULT_ERROR=10240 +DEFAULT_SCAN=(.nix .md .sh .yml .yaml .tf .py .j2) + +EXTENDED_WARN="" +EXTENDED_ERROR="" +declare -a EXTENDED_FILES=() +declare -a EXEMPT_PATTERNS=() +declare -a SCAN_EXTENSIONS=() + +# --------------------------------------------------------------------------- +# Detect CI environment +# --------------------------------------------------------------------------- +is_ci() { [[ -n "${GITHUB_ACTIONS:-}" ]]; } + +# --------------------------------------------------------------------------- +# Annotation helpers +# --------------------------------------------------------------------------- +emit_warning() { + local file="$1" msg="$2" + if is_ci; then + echo "::warning file=${file}::${msg}" + else + echo "WARNING: ${file}: ${msg}" + fi +} + +emit_error() { + local file="$1" msg="$2" + if is_ci; then + echo "::error file=${file}::${msg}" + else + echo "ERROR: ${file}: ${msg}" + fi +} + +# --------------------------------------------------------------------------- +# YAML parsing helpers +# --------------------------------------------------------------------------- +# Detect any working yq (Go mikefarah/yq v4+, or Python kislyuk/yq). +# Both support jq-style filter syntax: yq '' +has_yq() { + command -v yq &>/dev/null && yq '.defaults' .file-size.yml &>/dev/null 2>&1 +} + +# Read a scalar value from YAML using yq (jq-style syntax works in both variants) +yq_scalar() { + local file="$1" path="$2" + yq "${path} // empty" "$file" 2>/dev/null +} + +# Read a list from YAML using yq (one item per line) +yq_list() { + local file="$1" path="$2" + yq "${path} // [] | .[]" "$file" 2>/dev/null +} + +# Fallback: parse nested scalar (e.g., defaults.warn) using awk +fallback_nested_scalar() { + local file="$1" section="$2" key="$3" + awk -v section="$section" -v key="$key" ' + $0 ~ "^" section ":" { s=1; next } + s && /^[^ ]/ { exit } + s && $0 ~ key ":" { + sub(/.*:[ \t]*/, "") + sub(/#.*/, "") + gsub(/[ \t]/, "") + print + exit + } + ' "$file" +} + +# Fallback: read a list from a YAML section using awk +fallback_list() { + local file="$1" section="$2" + awk -v section="$section" ' + BEGIN { in_section = 0; indent = -1 } + { + if ($0 ~ "^" section ":") { + in_section = 1 + match($0, /^[[:space:]]*/); indent = RLENGTH + next + } + if (in_section) { + if (/^[[:space:]]*[^[:space:]#-]/) { + match($0, /^[[:space:]]*/); + if (RLENGTH <= indent) exit + } + if (/^[[:space:]]*- /) { + val = $0 + sub(/^[[:space:]]*- ["'"'"']?/, "", val) + sub(/["'"'"']?[[:space:]]*(#.*)?$/, "", val) + if (val != "") print val + } + } + } + ' "$file" +} + +# Fallback: read nested list (e.g., extended.files) using awk +fallback_nested_list() { + local file="$1" section="$2" key="$3" + awk -v section="$section" -v key="$key" ' + $0 ~ "^" section ":" { s=1; next } + s && /^[^ ]/ { exit } + s && $0 ~ key ":" { f=1; next } + s && f && /^[[:space:]]*- / { + val = $0 + sub(/^[[:space:]]*- ["'"'"']?/, "", val) + sub(/["'"'"']?[[:space:]]*(#.*)?$/, "", val) + print val + } + s && f && /^[[:space:]]*[^[:space:]#-]/ && !($0 ~ key ":") { exit } + ' "$file" +} + +# --------------------------------------------------------------------------- +# Parse config +# --------------------------------------------------------------------------- +parse_config() { + local config="$1" + + if has_yq; then + DEFAULT_WARN=$(yq_scalar "$config" ".defaults.warn") + DEFAULT_ERROR=$(yq_scalar "$config" ".defaults.error") + EXTENDED_WARN=$(yq_scalar "$config" ".extended.warn") + EXTENDED_ERROR=$(yq_scalar "$config" ".extended.error") + + while IFS= read -r line; do + [[ -n "$line" ]] && EXTENDED_FILES+=("$line") + done < <(yq_list "$config" ".extended.files") + + while IFS= read -r line; do + [[ -n "$line" ]] && EXEMPT_PATTERNS+=("$line") + done < <(yq_list "$config" ".exempt") + + while IFS= read -r line; do + [[ -n "$line" ]] && SCAN_EXTENSIONS+=("$line") + done < <(yq_list "$config" ".scan") + else + DEFAULT_WARN=$(fallback_nested_scalar "$config" "defaults" "warn") + DEFAULT_ERROR=$(fallback_nested_scalar "$config" "defaults" "error") + EXTENDED_WARN=$(fallback_nested_scalar "$config" "extended" "warn") + EXTENDED_ERROR=$(fallback_nested_scalar "$config" "extended" "error") + + while IFS= read -r line; do + [[ -n "$line" ]] && EXTENDED_FILES+=("$line") + done < <(fallback_nested_list "$config" "extended" "files") + + while IFS= read -r line; do + [[ -n "$line" ]] && EXEMPT_PATTERNS+=("$line") + done < <(fallback_list "$config" "exempt") + + while IFS= read -r line; do + [[ -n "$line" ]] && SCAN_EXTENSIONS+=("$line") + done < <(fallback_list "$config" "scan") + fi + + # Apply defaults for empty values + : "${DEFAULT_WARN:=5120}" + : "${DEFAULT_ERROR:=10240}" + + if [[ ${#SCAN_EXTENSIONS[@]} -eq 0 ]]; then + SCAN_EXTENSIONS=("${DEFAULT_SCAN[@]}") + fi +} + +# --------------------------------------------------------------------------- +# Check if a filename matches an exempt pattern +# --------------------------------------------------------------------------- +is_exempt() { + local file="$1" basename + basename=$(basename "$file") + for pattern in "${EXEMPT_PATTERNS[@]}"; do + # shellcheck disable=SC2254 + case "$basename" in + $pattern) return 0 ;; + esac + if [[ "$file" == "$pattern" || "$file" == "./$pattern" ]]; then + return 0 + fi + done + return 1 +} + +# --------------------------------------------------------------------------- +# Check if a filename is in the extended list +# --------------------------------------------------------------------------- +is_extended() { + local file="$1" basename + basename=$(basename "$file") + for ext_file in "${EXTENDED_FILES[@]}"; do + if [[ "$basename" == "$ext_file" ]]; then + return 0 + fi + done + return 1 +} + +# --------------------------------------------------------------------------- +# Get file size in bytes (cross-platform) +# --------------------------------------------------------------------------- +file_size_bytes() { + if stat --version &>/dev/null 2>&1; then + stat -c%s "$1" + else + stat -f%z "$1" + fi +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- +main() { + local config_file=".file-size.yml" + local error_count=0 + local warning_count=0 + + if [[ -f "$config_file" ]]; then + parse_config "$config_file" + else + SCAN_EXTENSIONS=("${DEFAULT_SCAN[@]}") + fi + + # Build find name predicates + local name_args=() + local first=true + for ext in "${SCAN_EXTENSIONS[@]}"; do + if $first; then first=false; else name_args+=(-o); fi + name_args+=(-name "*${ext}") + done + + while IFS= read -r -d '' file; do + if [[ ${#EXEMPT_PATTERNS[@]} -gt 0 ]] && is_exempt "$file"; then + continue + fi + + local size_bytes + size_bytes=$(file_size_bytes "$file") + + local warn_limit="$DEFAULT_WARN" + local error_limit="$DEFAULT_ERROR" + + if [[ -n "${EXTENDED_WARN:-}" && -n "${EXTENDED_ERROR:-}" ]] \ + && [[ ${#EXTENDED_FILES[@]} -gt 0 ]] \ + && is_extended "$file"; then + warn_limit="$EXTENDED_WARN" + error_limit="$EXTENDED_ERROR" + fi + + if [[ "$size_bytes" -ge "$error_limit" ]]; then + local size_kb=$(( size_bytes / 1024 )) + local limit_kb=$(( error_limit / 1024 )) + emit_error "$file" "${size_kb}KB exceeds ${limit_kb}KB error limit" + (( error_count++ )) + elif [[ "$size_bytes" -ge "$warn_limit" ]]; then + local size_kb=$(( size_bytes / 1024 )) + local limit_kb=$(( warn_limit / 1024 )) + emit_warning "$file" "${size_kb}KB exceeds ${limit_kb}KB warning limit" + (( warning_count++ )) + fi + done < <(find . -type f \( "${name_args[@]}" \) \ + -not -path "./.git/*" -not -path "./result/*" -not -path "./node_modules/*" \ + -print0 2>/dev/null | sort -z) + + if [[ $error_count -gt 0 || $warning_count -gt 0 ]]; then + echo "" + echo "File size check: ${error_count} error(s), ${warning_count} warning(s)" + else + echo "File size check: all files within limits" + fi + + if [[ $error_count -gt 125 ]]; then exit 125; fi + exit "$error_count" +} + +main "$@" From b5e3c315c3cf156a0346a154c32755e3b5a5a4db Mon Sep 17 00:00:00 2001 From: JacobPEvans <20714140+JacobPEvans@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:20:25 -0400 Subject: [PATCH 2/5] fix(pre-commit): add pass_filenames: false to file-size-check hook The script scans the whole repo via find and ignores positional args, so passing staged filenames is unnecessary and wasteful. (claude) --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a4fda20..6190976 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -85,4 +85,5 @@ repos: entry: scripts/workflows/check-file-sizes.sh language: script always_run: true + pass_filenames: false stages: [pre-commit] From 5632d50bf290206a6f3df28aaee3d82ea9a31439 Mon Sep 17 00:00:00 2001 From: JacobPEvans <20714140+JacobPEvans@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:20:44 -0400 Subject: [PATCH 3/5] fix(file-size): add curl retries and graceful fallback on download failure Adds --retry 3 --retry-connrefused to match _markdown-lint.yml pattern. When the shared checker download fails, emit a warning and fall through to the inline fallback instead of failing the workflow. (claude) --- .github/workflows/_file-size.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/_file-size.yml b/.github/workflows/_file-size.yml index 87d5eaa..86da7c4 100644 --- a/.github/workflows/_file-size.yml +++ b/.github/workflows/_file-size.yml @@ -39,11 +39,13 @@ jobs: # Priority 2: Use shared YAML-config checker if .file-size.yml exists if [ -f ".file-size.yml" ]; then - curl -sSfL \ + if curl -sSfL --retry 3 --retry-connrefused \ "https://raw.githubusercontent.com/${REPO_OWNER}/.github/main/scripts/workflows/check-file-sizes.sh" \ - -o /tmp/check-file-sizes.sh - chmod +x /tmp/check-file-sizes.sh - exec /tmp/check-file-sizes.sh + -o /tmp/check-file-sizes.sh; then + chmod +x /tmp/check-file-sizes.sh + exec /tmp/check-file-sizes.sh + fi + echo "::warning::Failed to download shared checker; falling back to inline" fi # Priority 3: Inline fallback (legacy) From f3c63393fa2e64f437890f4ae67f7c191d67ae86 Mon Sep 17 00:00:00 2001 From: JacobPEvans <20714140+JacobPEvans@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:21:58 -0400 Subject: [PATCH 4/5] fix(file-size): decouple yq detection from config parsing, use raw output - has_yq now only checks yq availability, not config validity - parse_config validates YAML upfront and fails fast on malformed configs - Add -r flag to yq calls for raw scalar output, preventing quoted patterns (e.g., "*.lock") from breaking glob/case matching - Fix arithmetic increment under set -e (0++ is falsy, causes early exit) - Add large repo docs to extended files list in .file-size.yml (claude) --- .file-size.yml | 4 ++++ scripts/workflows/check-file-sizes.sh | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.file-size.yml b/.file-size.yml index 290f7c8..023bfa7 100644 --- a/.file-size.yml +++ b/.file-size.yml @@ -14,7 +14,11 @@ extended: error: 20480 # 20 KB files: - CHANGELOG.md + - CONTRIBUTING.md + - LABELS.md - README.md + - repo-health-audit-prompt.md + - check-file-sizes.sh exempt: - "*.lock" diff --git a/scripts/workflows/check-file-sizes.sh b/scripts/workflows/check-file-sizes.sh index 2e98da8..c77dbc2 100755 --- a/scripts/workflows/check-file-sizes.sh +++ b/scripts/workflows/check-file-sizes.sh @@ -52,21 +52,20 @@ emit_error() { # YAML parsing helpers # --------------------------------------------------------------------------- # Detect any working yq (Go mikefarah/yq v4+, or Python kislyuk/yq). -# Both support jq-style filter syntax: yq '' has_yq() { - command -v yq &>/dev/null && yq '.defaults' .file-size.yml &>/dev/null 2>&1 + command -v yq &>/dev/null } -# Read a scalar value from YAML using yq (jq-style syntax works in both variants) +# Read a scalar value from YAML using yq (raw output to avoid quoted strings) yq_scalar() { local file="$1" path="$2" - yq "${path} // empty" "$file" 2>/dev/null + yq -r "${path} // empty" "$file" } -# Read a list from YAML using yq (one item per line) +# Read a list from YAML using yq (one item per line, raw output) yq_list() { local file="$1" path="$2" - yq "${path} // [] | .[]" "$file" 2>/dev/null + yq -r "${path} // [] | .[]" "$file" } # Fallback: parse nested scalar (e.g., defaults.warn) using awk @@ -136,6 +135,11 @@ parse_config() { local config="$1" if has_yq; then + # Validate that yq can parse the config; fail fast on malformed YAML + if ! yq '.' "$config" &>/dev/null; then + echo "ERROR: yq failed to parse ${config}" >&2 + exit 1 + fi DEFAULT_WARN=$(yq_scalar "$config" ".defaults.warn") DEFAULT_ERROR=$(yq_scalar "$config" ".defaults.error") EXTENDED_WARN=$(yq_scalar "$config" ".extended.warn") @@ -267,12 +271,12 @@ main() { local size_kb=$(( size_bytes / 1024 )) local limit_kb=$(( error_limit / 1024 )) emit_error "$file" "${size_kb}KB exceeds ${limit_kb}KB error limit" - (( error_count++ )) + error_count=$(( error_count + 1 )) elif [[ "$size_bytes" -ge "$warn_limit" ]]; then local size_kb=$(( size_bytes / 1024 )) local limit_kb=$(( warn_limit / 1024 )) emit_warning "$file" "${size_kb}KB exceeds ${limit_kb}KB warning limit" - (( warning_count++ )) + warning_count=$(( warning_count + 1 )) fi done < <(find . -type f \( "${name_args[@]}" \) \ -not -path "./.git/*" -not -path "./result/*" -not -path "./node_modules/*" \ From 27dc11391ff2b220d997fcf708eeb87ece4fdf82 Mon Sep 17 00:00:00 2001 From: JacobPEvans <20714140+JacobPEvans@users.noreply.github.com> Date: Sun, 15 Mar 2026 15:03:21 -0400 Subject: [PATCH 5/5] refactor: simplify check-file-sizes.sh from 296 to 155 lines - Drop awk fallback, require yq (available on GH Actions + Nix devShells) - Use wc -c instead of stat platform dance - Merge emit_warning/emit_error into single emit() function - Extract read_into() helper to DRY 6 identical while-read loops - Merge is_exempt/is_extended into matches_list() with nameref - Resolve CI mode once at startup, declare locals outside loop - Remove self-exemption from .file-size.yml (script now 5KB, under 10KB limit) (claude) --- .file-size.yml | 1 - scripts/workflows/check-file-sizes.sh | 267 ++++++-------------------- 2 files changed, 63 insertions(+), 205 deletions(-) diff --git a/.file-size.yml b/.file-size.yml index 023bfa7..d1fae20 100644 --- a/.file-size.yml +++ b/.file-size.yml @@ -18,7 +18,6 @@ extended: - LABELS.md - README.md - repo-health-audit-prompt.md - - check-file-sizes.sh exempt: - "*.lock" diff --git a/scripts/workflows/check-file-sizes.sh b/scripts/workflows/check-file-sizes.sh index c77dbc2..93f9810 100755 --- a/scripts/workflows/check-file-sizes.sh +++ b/scripts/workflows/check-file-sizes.sh @@ -3,7 +3,7 @@ # File Size Checker # ============================================================================= # Checks file sizes against YAML config (.file-size.yml). -# Reads per-file, extended, and default tier limits. +# Requires yq (Go mikefarah/yq or Python kislyuk/yq). # Outputs GitHub Actions annotations in CI, plain text otherwise. # Exit code = number of error-level violations (capped at 125). # ============================================================================= @@ -15,216 +15,77 @@ set -euo pipefail DEFAULT_WARN=5120 DEFAULT_ERROR=10240 DEFAULT_SCAN=(.nix .md .sh .yml .yaml .tf .py .j2) - EXTENDED_WARN="" EXTENDED_ERROR="" declare -a EXTENDED_FILES=() declare -a EXEMPT_PATTERNS=() declare -a SCAN_EXTENSIONS=() -# --------------------------------------------------------------------------- -# Detect CI environment -# --------------------------------------------------------------------------- -is_ci() { [[ -n "${GITHUB_ACTIONS:-}" ]]; } +# Resolve CI mode once at startup +CI_MODE=false +[[ -n "${GITHUB_ACTIONS:-}" ]] && CI_MODE=true # --------------------------------------------------------------------------- -# Annotation helpers +# Helpers # --------------------------------------------------------------------------- -emit_warning() { - local file="$1" msg="$2" - if is_ci; then - echo "::warning file=${file}::${msg}" - else - echo "WARNING: ${file}: ${msg}" - fi -} - -emit_error() { - local file="$1" msg="$2" - if is_ci; then - echo "::error file=${file}::${msg}" +emit() { + local level="$1" file="$2" msg="$3" + if $CI_MODE; then + echo "::${level} file=${file}::${msg}" else - echo "ERROR: ${file}: ${msg}" + echo "${level^^}: ${file}: ${msg}" fi } -# --------------------------------------------------------------------------- -# YAML parsing helpers -# --------------------------------------------------------------------------- -# Detect any working yq (Go mikefarah/yq v4+, or Python kislyuk/yq). -has_yq() { - command -v yq &>/dev/null -} - -# Read a scalar value from YAML using yq (raw output to avoid quoted strings) -yq_scalar() { - local file="$1" path="$2" - yq -r "${path} // empty" "$file" -} - -# Read a list from YAML using yq (one item per line, raw output) -yq_list() { - local file="$1" path="$2" - yq -r "${path} // [] | .[]" "$file" -} - -# Fallback: parse nested scalar (e.g., defaults.warn) using awk -fallback_nested_scalar() { - local file="$1" section="$2" key="$3" - awk -v section="$section" -v key="$key" ' - $0 ~ "^" section ":" { s=1; next } - s && /^[^ ]/ { exit } - s && $0 ~ key ":" { - sub(/.*:[ \t]*/, "") - sub(/#.*/, "") - gsub(/[ \t]/, "") - print - exit - } - ' "$file" +# Populate a global array from a command's line-by-line output +# Usage: read_into ARRAY_NAME command args... +read_into() { + local -n _arr=$1; shift + while IFS= read -r line; do + [[ -n "$line" ]] && _arr+=("$line") + done < <("$@") } -# Fallback: read a list from a YAML section using awk -fallback_list() { - local file="$1" section="$2" - awk -v section="$section" ' - BEGIN { in_section = 0; indent = -1 } - { - if ($0 ~ "^" section ":") { - in_section = 1 - match($0, /^[[:space:]]*/); indent = RLENGTH - next - } - if (in_section) { - if (/^[[:space:]]*[^[:space:]#-]/) { - match($0, /^[[:space:]]*/); - if (RLENGTH <= indent) exit - } - if (/^[[:space:]]*- /) { - val = $0 - sub(/^[[:space:]]*- ["'"'"']?/, "", val) - sub(/["'"'"']?[[:space:]]*(#.*)?$/, "", val) - if (val != "") print val - } - } - } - ' "$file" -} - -# Fallback: read nested list (e.g., extended.files) using awk -fallback_nested_list() { - local file="$1" section="$2" key="$3" - awk -v section="$section" -v key="$key" ' - $0 ~ "^" section ":" { s=1; next } - s && /^[^ ]/ { exit } - s && $0 ~ key ":" { f=1; next } - s && f && /^[[:space:]]*- / { - val = $0 - sub(/^[[:space:]]*- ["'"'"']?/, "", val) - sub(/["'"'"']?[[:space:]]*(#.*)?$/, "", val) - print val - } - s && f && /^[[:space:]]*[^[:space:]#-]/ && !($0 ~ key ":") { exit } - ' "$file" +# Check if a basename matches any entry in a named array (glob matching) +# Usage: matches_list ARRAY_NAME "$file" +matches_list() { + local -n _list=$1 + local bn + bn=$(basename "$2") + for pattern in "${_list[@]}"; do + # shellcheck disable=SC2254 + case "$bn" in $pattern) return 0 ;; esac + done + return 1 } # --------------------------------------------------------------------------- -# Parse config +# Parse config (requires yq) # --------------------------------------------------------------------------- parse_config() { local config="$1" - if has_yq; then - # Validate that yq can parse the config; fail fast on malformed YAML - if ! yq '.' "$config" &>/dev/null; then - echo "ERROR: yq failed to parse ${config}" >&2 - exit 1 - fi - DEFAULT_WARN=$(yq_scalar "$config" ".defaults.warn") - DEFAULT_ERROR=$(yq_scalar "$config" ".defaults.error") - EXTENDED_WARN=$(yq_scalar "$config" ".extended.warn") - EXTENDED_ERROR=$(yq_scalar "$config" ".extended.error") - - while IFS= read -r line; do - [[ -n "$line" ]] && EXTENDED_FILES+=("$line") - done < <(yq_list "$config" ".extended.files") - - while IFS= read -r line; do - [[ -n "$line" ]] && EXEMPT_PATTERNS+=("$line") - done < <(yq_list "$config" ".exempt") - - while IFS= read -r line; do - [[ -n "$line" ]] && SCAN_EXTENSIONS+=("$line") - done < <(yq_list "$config" ".scan") - else - DEFAULT_WARN=$(fallback_nested_scalar "$config" "defaults" "warn") - DEFAULT_ERROR=$(fallback_nested_scalar "$config" "defaults" "error") - EXTENDED_WARN=$(fallback_nested_scalar "$config" "extended" "warn") - EXTENDED_ERROR=$(fallback_nested_scalar "$config" "extended" "error") - - while IFS= read -r line; do - [[ -n "$line" ]] && EXTENDED_FILES+=("$line") - done < <(fallback_nested_list "$config" "extended" "files") - - while IFS= read -r line; do - [[ -n "$line" ]] && EXEMPT_PATTERNS+=("$line") - done < <(fallback_list "$config" "exempt") - - while IFS= read -r line; do - [[ -n "$line" ]] && SCAN_EXTENSIONS+=("$line") - done < <(fallback_list "$config" "scan") + if ! command -v yq &>/dev/null; then + echo "ERROR: yq is required to parse ${config}" >&2 + return 1 fi - # Apply defaults for empty values - : "${DEFAULT_WARN:=5120}" - : "${DEFAULT_ERROR:=10240}" - - if [[ ${#SCAN_EXTENSIONS[@]} -eq 0 ]]; then - SCAN_EXTENSIONS=("${DEFAULT_SCAN[@]}") + if ! yq '.' "$config" &>/dev/null; then + echo "ERROR: yq failed to parse ${config}" >&2 + return 1 fi -} -# --------------------------------------------------------------------------- -# Check if a filename matches an exempt pattern -# --------------------------------------------------------------------------- -is_exempt() { - local file="$1" basename - basename=$(basename "$file") - for pattern in "${EXEMPT_PATTERNS[@]}"; do - # shellcheck disable=SC2254 - case "$basename" in - $pattern) return 0 ;; - esac - if [[ "$file" == "$pattern" || "$file" == "./$pattern" ]]; then - return 0 - fi - done - return 1 -} + DEFAULT_WARN=$(yq -r '.defaults.warn // empty' "$config") + DEFAULT_ERROR=$(yq -r '.defaults.error // empty' "$config") + EXTENDED_WARN=$(yq -r '.extended.warn // empty' "$config") + EXTENDED_ERROR=$(yq -r '.extended.error // empty' "$config") -# --------------------------------------------------------------------------- -# Check if a filename is in the extended list -# --------------------------------------------------------------------------- -is_extended() { - local file="$1" basename - basename=$(basename "$file") - for ext_file in "${EXTENDED_FILES[@]}"; do - if [[ "$basename" == "$ext_file" ]]; then - return 0 - fi - done - return 1 -} + read_into EXTENDED_FILES yq -r '.extended.files // [] | .[]' "$config" + read_into EXEMPT_PATTERNS yq -r '.exempt // [] | .[]' "$config" + read_into SCAN_EXTENSIONS yq -r '.scan // [] | .[]' "$config" -# --------------------------------------------------------------------------- -# Get file size in bytes (cross-platform) -# --------------------------------------------------------------------------- -file_size_bytes() { - if stat --version &>/dev/null 2>&1; then - stat -c%s "$1" - else - stat -f%z "$1" - fi + : "${DEFAULT_WARN:=5120}" + : "${DEFAULT_ERROR:=10240}" } # --------------------------------------------------------------------------- @@ -232,50 +93,48 @@ file_size_bytes() { # --------------------------------------------------------------------------- main() { local config_file=".file-size.yml" - local error_count=0 - local warning_count=0 if [[ -f "$config_file" ]]; then - parse_config "$config_file" - else + parse_config "$config_file" || exit 1 + fi + + if [[ ${#SCAN_EXTENSIONS[@]} -eq 0 ]]; then SCAN_EXTENSIONS=("${DEFAULT_SCAN[@]}") fi # Build find name predicates - local name_args=() - local first=true + local name_args=() first=true for ext in "${SCAN_EXTENSIONS[@]}"; do - if $first; then first=false; else name_args+=(-o); fi + $first && first=false || name_args+=(-o) name_args+=(-name "*${ext}") done + local error_count=0 warning_count=0 + local size_bytes warn_limit error_limit size_kb limit_kb + while IFS= read -r -d '' file; do - if [[ ${#EXEMPT_PATTERNS[@]} -gt 0 ]] && is_exempt "$file"; then + if [[ ${#EXEMPT_PATTERNS[@]} -gt 0 ]] && matches_list EXEMPT_PATTERNS "$file"; then continue fi - local size_bytes - size_bytes=$(file_size_bytes "$file") - - local warn_limit="$DEFAULT_WARN" - local error_limit="$DEFAULT_ERROR" + size_bytes=$(wc -c < "$file" | tr -d ' ') + warn_limit="$DEFAULT_WARN" + error_limit="$DEFAULT_ERROR" if [[ -n "${EXTENDED_WARN:-}" && -n "${EXTENDED_ERROR:-}" ]] \ && [[ ${#EXTENDED_FILES[@]} -gt 0 ]] \ - && is_extended "$file"; then + && matches_list EXTENDED_FILES "$file"; then warn_limit="$EXTENDED_WARN" error_limit="$EXTENDED_ERROR" fi if [[ "$size_bytes" -ge "$error_limit" ]]; then - local size_kb=$(( size_bytes / 1024 )) - local limit_kb=$(( error_limit / 1024 )) - emit_error "$file" "${size_kb}KB exceeds ${limit_kb}KB error limit" + size_kb=$(( size_bytes / 1024 )); limit_kb=$(( error_limit / 1024 )) + emit error "$file" "${size_kb}KB exceeds ${limit_kb}KB error limit" error_count=$(( error_count + 1 )) elif [[ "$size_bytes" -ge "$warn_limit" ]]; then - local size_kb=$(( size_bytes / 1024 )) - local limit_kb=$(( warn_limit / 1024 )) - emit_warning "$file" "${size_kb}KB exceeds ${limit_kb}KB warning limit" + size_kb=$(( size_bytes / 1024 )); limit_kb=$(( warn_limit / 1024 )) + emit warning "$file" "${size_kb}KB exceeds ${limit_kb}KB warning limit" warning_count=$(( warning_count + 1 )) fi done < <(find . -type f \( "${name_args[@]}" \) \ @@ -289,7 +148,7 @@ main() { echo "File size check: all files within limits" fi - if [[ $error_count -gt 125 ]]; then exit 125; fi + [[ $error_count -gt 125 ]] && exit 125 exit "$error_count" }