diff --git a/.github/actions/timed-check/action.yml b/.github/actions/timed-check/action.yml new file mode 100644 index 0000000..8294b6a --- /dev/null +++ b/.github/actions/timed-check/action.yml @@ -0,0 +1,18 @@ +name: Timed Python Check +inputs: + script: + required: true + display: + required: true +runs: + using: composite + steps: + - shell: bash + run: | + mkdir -p "$RUNNER_TEMP/check_results" + s=$(date +%s%N) + python "${{ inputs.script }}" && outcome=success || outcome=failure + time=$(awk "BEGIN{printf \"%.3f\",($(date +%s%N)-$s)/1e9}") + printf '{"outcome":"%s","time":"%s"}' "$outcome" "$time" \ + > "$RUNNER_TEMP/check_results/${{ inputs.display }}.json" + [ "$outcome" = "success" ] diff --git a/.github/checks/check_eps_savefig.py b/.github/checks/check_eps_savefig.py new file mode 100644 index 0000000..7d89454 --- /dev/null +++ b/.github/checks/check_eps_savefig.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +import os +import re +import sys + +def IsExcludedPath(path, excluded): + norm = os.path.normpath(path) + return any(norm == e or norm.startswith(e + os.sep) for e in excluded) + +def IsHidden(name): + return name.startswith(".") + +def IsIgnoredDir(name): + return IsHidden(name) or name == "__pycache__" + +def ReadFile(path): + try: + with open(path, "r", encoding="utf-8") as fh: + return fh.read() + except Exception: + return None + +def WalkFiles(root, excluded, extension): + for dir_path, dir_names, file_names in os.walk(root): + if IsExcludedPath(dir_path, excluded): + dir_names[:] = [] + continue + + dir_names[:] = [d for d in dir_names if not IsIgnoredDir(d)] + + for file_name in file_names: + if file_name.endswith(extension) and not IsHidden(file_name): + yield os.path.join(dir_path, file_name) + +def CheckEpsSavefig(content): + eps_savefig_patterns = [ + r'\.savefig\([^)]*[\'"].*eps.*[\'"][^)]*\)', + r'\.savefig\([^)]*format\s*=\s*[\'"]eps[\'"][^)]*\)' + ] + + eps_lines = [] + for line_num, line in enumerate(content.split('\n'), 1): + for pattern in eps_savefig_patterns: + if re.search(pattern, line, re.IGNORECASE): + eps_lines.append(f"Line {line_num}: {line.strip()}") + break + + remove_count = content.count('remove_eps_info(') + return eps_lines if len(eps_lines) != remove_count else [] + +def CollectEpsProblems(root, excluded): + problems = [] + for file_path in WalkFiles(root, excluded, '.py'): + content = ReadFile(file_path) + if content is None: + continue + file_problems = CheckEpsSavefig(content) + if file_problems: + problems.append({'file': file_path, 'issues': file_problems}) + return problems + +def Main(): + source_root = "source" + excluded = ["source/lib", "source/raw", "source/scrape"] + + problems = CollectEpsProblems(source_root, excluded) + + if problems: + print("EPS savefig check failed!") + print("\nPython files using .savefig(*eps*) without remove_eps_info():") + for problem in problems: + print(f"\nFile: {problem['file']}") + for issue in problem['issues']: + print(f" {issue}") + print("\nTo fix: Add 'from source.lib.JMSLab.remove_eps_info import remove_eps_info'") + print("and call 'remove_eps_info(filename)' after each EPS savefig.") + return 1 + else: + print("EPS savefig check: all checks passed.") + return 0 + +if __name__ == "__main__": + sys.exit(Main()) diff --git a/.github/checks/check_newlines.py b/.github/checks/check_newlines.py new file mode 100644 index 0000000..931cda6 --- /dev/null +++ b/.github/checks/check_newlines.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +import subprocess +import sys +from pathlib import Path + +EXCLUDED_EXT = {".log", ".txt"} + +INCLUDED_DIRS = {"source", "output"} + + +def GetTrackedFiles(): + return [Path(p) for p in subprocess.run(["git", "ls-files"], stdout=subprocess.PIPE, check=True).stdout.decode().splitlines() if p.strip() and Path(p).parts[0] in INCLUDED_DIRS] + + +def IsBinary(p): + try: + return b"\0" in p.read_bytes()[:4096] + except Exception: + return True + + +def NeedsNewline(p): + try: + d = p.read_bytes() + return len(d) == 0 or not d.endswith(b"\n") + except Exception: + return False + + +def ProcessFiles(): + missing = [p for p in GetTrackedFiles() if p.exists( + ) and p.suffix not in EXCLUDED_EXT and not IsBinary(p) and NeedsNewline(p)] + if not missing: + print("No files missing trailing newlines.") + return 0 + print("Files missing trailing newline:") + for p in missing: + print(" -", p) + return 1 + + +def Main(): + return ProcessFiles() + + +if __name__ == "__main__": + sys.exit(Main()) diff --git a/.github/checks/check_sconscript_log.py b/.github/checks/check_sconscript_log.py new file mode 100644 index 0000000..f3e12c9 --- /dev/null +++ b/.github/checks/check_sconscript_log.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +import sys +from pathlib import Path + +TARGET = "terminated because of errors." + +def Main(): + bad = [] + for p in Path(".").rglob("*.log"): + try: + if TARGET in p.read_text(errors="replace"): + bad.append(p) + except Exception: + pass + + if not bad: + print("No log files contain the error string.") + return 0 + + print("Problematic log files:") + for p in bad: + print(" -", p) + + return 1 + +if __name__ == "__main__": + sys.exit(Main()) diff --git a/.github/checks/check_sconscripts.py b/.github/checks/check_sconscripts.py new file mode 100644 index 0000000..be10771 --- /dev/null +++ b/.github/checks/check_sconscripts.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +import os +import re +import sys +from pathlib import Path + +ROOT = Path("source") +EXCLUDED = {Path("source/lib"), Path("source/raw"), Path("source/scrape")} +PAPER_DIR = Path("source/paper") +PAPER_EXTS = {".bib", ".tex", ".lyx"} + + +def Main(): + missing_dirs, missing_mentions = CollectProblems() + missing_in_sconstruct = SourceFoldersMissingInSConstruct() + any_problems = bool(missing_dirs or missing_mentions or missing_in_sconstruct) + if any_problems: + print("SConscript/SConstruct summary of missing items:") + if missing_dirs: + print("\nFolders missing SConscript:") + for p in missing_dirs: + print(p) + if missing_mentions: + print("\nFiles not mentioned in their SConscript:") + for p in missing_mentions: + print(p) + if missing_in_sconstruct: + print("\nTop-level source folders missing from root SConstruct:") + for p in missing_in_sconstruct: + print(p) + else: + print("SConscript/SConstruct summary: all checks passed.") + return 1 if any_problems else 0 + + +def CollectProblems(): + missing_dirs = [] + missing_mentions = [] + for dir_path, dir_names, file_names in os.walk(ROOT): + dir_path = Path(dir_path) + if IsExcluded(dir_path): + dir_names[:] = [] + continue + dir_names[:] = sorted(d for d in dir_names if not IsIgnored(d)) + file_names = [f for f in file_names if not IsIgnored(f)] + if dir_path == ROOT or (not dir_names and not file_names): + continue + content = Read(dir_path / "SConscript") + if content is None: + parent_content = Read(dir_path.parent / "SConscript") + if parent_content is None: + missing_dirs.append(dir_path) + continue + for f in sorted(f for f in file_names if f != "SConscript"): + if ShouldCheck(dir_path, f) and not IsMentioned(content, f, dir_path): + missing_mentions.append(f"{dir_path} -> {f}") + for subdir in dir_names: + if re.search(rf"\b{re.escape(subdir)}\b", content): + continue + subdir_path = dir_path / subdir + try: + subfiles = sorted( + e for e in os.listdir(subdir_path) + if not IsIgnored(e) and (subdir_path / e).is_file() and ShouldCheck(subdir_path, e) + ) + except Exception: + missing_dirs.append(subdir_path) + continue + for f in subfiles: + if not IsMentioned(content, f, subdir_path): + missing_mentions.append(f"{dir_path} -> {subdir}/{f}") + return missing_dirs, missing_mentions + + +def IsExcluded(dir_path): + return any(dir_path == e or dir_path.is_relative_to(e) for e in EXCLUDED) + + +def IsIgnored(name): + return name.startswith(".") or name == "__pycache__" + + +def ShouldCheck(dir_path, name): + if dir_path.is_relative_to(PAPER_DIR): + return Path(name).suffix in PAPER_EXTS + return True + + +def Read(path): + try: + return Path(path).read_text(encoding="utf-8") + except Exception: + return None + + +def IsMentioned(content, name, dir_path): + rel = dir_path.relative_to(ROOT).as_posix() + return f"#source/{rel}/{name}" in content + + +def SourceFoldersMissingInSConstruct(): + content = Read("SConstruct") + folders = sorted( + e for e in os.listdir(ROOT) + if (ROOT / e).is_dir() and not IsIgnored(e) and not IsExcluded(ROOT / e) + ) + if content is None: + return folders + return [f for f in folders if f"source/{f}/SConscript" not in content] + + +if __name__ == "__main__": + sys.exit(Main()) diff --git a/.github/checks/checks.json b/.github/checks/checks.json new file mode 100644 index 0000000..a0f40dc --- /dev/null +++ b/.github/checks/checks.json @@ -0,0 +1,6 @@ +[ + {"name": "SCons DAG", "command": "/run-actions-dag"}, + {"name": "Newlines", "command": "/run-actions-newlines"}, + {"name": "EPS data", "command": "/run-actions-eps"}, + {"name": "Build log", "command": "/run-actions-log"} +] diff --git a/.github/helper_scripts/parse_commands.py b/.github/helper_scripts/parse_commands.py new file mode 100644 index 0000000..650eb97 --- /dev/null +++ b/.github/helper_scripts/parse_commands.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +import json +import os +import sys +from pathlib import Path + +def LoadChecks(): + checks_path = Path(__file__).parent.parent / 'checks' / 'checks.json' + return json.loads(checks_path.read_text()) + +def ParseCommands(): + checks = LoadChecks() + comment = os.environ.get('COMMENT_BODY', '') + return [c['name'] for c in checks + if os.environ.get('GITHUB_EVENT_NAME') == 'push' + or '/run-actions-all' in comment + or c.get('command', '') in comment] + +def Main(): + print(json.dumps(ParseCommands())) + return 0 + +if __name__ == '__main__': + sys.exit(Main()) diff --git a/.github/helper_scripts/post_check_results.py b/.github/helper_scripts/post_check_results.py new file mode 100644 index 0000000..cbf7b32 --- /dev/null +++ b/.github/helper_scripts/post_check_results.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +import json +import os +import subprocess +import sys +from pathlib import Path + +CHECKS_JSON = Path(__file__).parent.parent / 'checks' / 'checks.json' + +def Main(): + repo = os.environ["GITHUB_REPOSITORY"] + run_id = os.environ["GITHUB_RUN_ID"] + + print("Runtime:") + rows, failed = CollectResults() + + if "--post" in sys.argv: + PostResults(repo, run_id, rows, failed) + + if failed: + print(f"Failed checks: {', '.join(failed)}") + return 1 + return 0 + +def CollectResults(): + checks = json.loads(CHECKS_JSON.read_text()) + results_dir = Path(os.environ["RUNNER_TEMP"]) / "check_results" + rows, failed = [], [] + STATUS = {"success": "✅", "failure": "❌"} + for check in checks: + check_name = check["name"] + result_file = results_dir / f"{check_name}.json" + if result_file.exists(): + result = json.loads(result_file.read_text()) + outcome = result["outcome"] + elapsed = result["time"] + print(f" {check_name}: {elapsed}s") + status_icon = STATUS.get(outcome, "❌") + if outcome != "success": + failed.append(check_name) + rows.append(f"| {check_name} | {status_icon} | {elapsed}s |") + else: + print(f" {check_name}: skipped") + rows.append(f"| {check_name} | SKIP | |") + return rows, failed + +def PostResults(repo, run_id, rows, failed): + run_url = f"https://github.com/{repo}/actions/runs/{run_id}" + table = "\n".join(["| Check | Result | Time |", "|-------|--------|------|", *rows]) + body = f"**Check Results** ([run details]({run_url}))\n\n{table}" + pr_num = os.environ["PR_NUMBER"] + pr_sha = os.environ["PR_SHA"] + subprocess.run([ + "gh", "api", f"repos/{repo}/issues/{pr_num}/comments", + "--method", "POST", "-f", f"body={body}", + ], check=True) + state = "failure" if failed else "success" + desc = f"Failed: {', '.join(failed)}" if failed else "All checks passed" + subprocess.run([ + "gh", "api", f"repos/{repo}/statuses/{pr_sha}", + "-f", f"state={state}", "-f", "context=Checks", + "-f", f"description={desc}", "-f", f"target_url={run_url}", + ], check=True) + +if __name__ == "__main__": + sys.exit(Main()) diff --git a/.github/helper_scripts/pr_close_comment.py b/.github/helper_scripts/pr_close_comment.py new file mode 100644 index 0000000..7d93594 --- /dev/null +++ b/.github/helper_scripts/pr_close_comment.py @@ -0,0 +1,49 @@ +import os +import re +import sys +from pathlib import Path +from github import Github + +ISSUE_TEMPLATE = Path(".github/post_template_issue_thread_pr_close.md") +PR_TEMPLATE = Path(".github/post_template_pr_thread_pr_close.md") + + +def Main(): + github_token = os.environ["GITHUB_TOKEN"] + repo_name = os.environ["REPO"] + pr_number = int(os.environ["PR_NUMBER"]) + pr_author = os.environ["PR_AUTHOR"] + branch_name = os.environ["BRANCH_NAME"] + last_commit_sha = os.environ["LAST_COMMIT_SHA"] + + repo = Github(github_token).get_repo(repo_name) + pr = repo.get_pull(pr_number) + + issue_comment_url = PostIssueComment(repo, branch_name, last_commit_sha) + PostPrComment(pr, pr_author, issue_comment_url) + return 0 + + +def PostIssueComment(repo, branch_name, last_commit_sha): + issue_match = re.match(r"^(\d+)", branch_name) + if not issue_match: + return None + + issue_number = int(issue_match.group(1)) + issue_body = ISSUE_TEMPLATE.read_text() + f"\n\nLast commit in issue branch: {last_commit_sha}" + + comment = repo.get_issue(issue_number).create_comment(issue_body) + return comment.html_url + + +def PostPrComment(pr, pr_author, issue_comment_url): + pr_body = PR_TEMPLATE.read_text() + + if issue_comment_url: + pr_body = f"[Issue summary]({issue_comment_url})\n\n{pr_body}" + + pr.as_issue().create_comment(f"@{pr_author} {pr_body}") + + +if __name__ == "__main__": + sys.exit(Main()) diff --git a/.github/post_template_issue_thread_pr_close.md b/.github/post_template_issue_thread_pr_close.md new file mode 100644 index 0000000..dc49fbf --- /dev/null +++ b/.github/post_template_issue_thread_pr_close.md @@ -0,0 +1,10 @@ +### Summary + +In this issue, we: + +- WHAT DID YOU DO? + +Deliverables: +- WHAT DID YOU BUILD? IN PERMALINK + +[Optional] Issue folder is PERMALINK diff --git a/.github/post_template_pr_thread_pr_close.md b/.github/post_template_pr_thread_pr_close.md new file mode 100644 index 0000000..a4488dd --- /dev/null +++ b/.github/post_template_pr_thread_pr_close.md @@ -0,0 +1,6 @@ +Thanks for closing this pull. + +Before leaving the pull, please be sure you have completed all the required steps in the [workflow](https://github.com/JMSLab/Template/blob/main/docs/workflow.md). + +This includes filling in the issue summary linked at the top of this comment. + diff --git a/.github/readme_for_checks.md b/.github/readme_for_checks.md new file mode 100644 index 0000000..cc37aaf --- /dev/null +++ b/.github/readme_for_checks.md @@ -0,0 +1,8 @@ +# GitHub Actions Checks + +## Adding a new check + +1. Add a step to `workflows/checks.yml` with a unique `id` and `display` (e.g. `id: check_newlines`, `display: Newlines`), and a `script` pointing to your script in `checks/` +2. Add `{"name": "Newlines", "command": "/run-actions-newlines"}` (i.e. matching `display`) to `checks/checks.json` + +The order of entries in `checks.json` controls the row order in the results table. diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml new file mode 100644 index 0000000..44e3cc6 --- /dev/null +++ b/.github/workflows/checks.yml @@ -0,0 +1,96 @@ +name: Checks + +on: + push: + branches: + - "**" + issue_comment: + types: [created] + +jobs: + checks: + if: | + (github.event_name == 'push' && contains(github.event.head_commit.message, '[run-actions-all]')) || + (github.event_name == 'issue_comment' && github.event.issue.pull_request && contains(github.event.comment.body, '/run-actions-')) + runs-on: ubuntu-latest + timeout-minutes: 1 + permissions: + statuses: write + pull-requests: write + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + ref: ${{ github.event_name == 'issue_comment' && format('refs/pull/{0}/head', github.event.issue.number) || github.sha }} + + - name: Post pending commit status + if: github.event_name == 'issue_comment' + env: + GH_TOKEN: ${{ github.token }} + run: | + PR_SHA=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.issue.number }} --jq '.head.sha') + echo "PR_SHA=$PR_SHA" >> $GITHUB_ENV + gh api repos/${{ github.repository }}/statuses/$PR_SHA \ + -f state=pending \ + -f context="Checks" \ + -f description="Running..." \ + -f target_url="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: "3.x" + + - id: parse_commands + name: Parse requested checks + env: + COMMENT_BODY: ${{ github.event.comment.body }} + run: echo "run=$(python .github/helper_scripts/parse_commands.py)" >> $GITHUB_OUTPUT + shell: bash + + - id: check_scons + name: Check SCons DAG for missing dependencies + if: contains(fromJSON(steps.parse_commands.outputs.run), 'SCons DAG') + continue-on-error: true + uses: ./.github/actions/timed-check + with: + script: .github/checks/check_sconscripts.py + display: SCons DAG + + - id: check_newlines + name: Check missing newlines + if: contains(fromJSON(steps.parse_commands.outputs.run), 'Newlines') + continue-on-error: true + uses: ./.github/actions/timed-check + with: + script: .github/checks/check_newlines.py + display: Newlines + + - id: check_eps + name: Check extraneous EPS data + if: contains(fromJSON(steps.parse_commands.outputs.run), 'EPS data') + continue-on-error: true + uses: ./.github/actions/timed-check + with: + script: .github/checks/check_eps_savefig.py + display: EPS data + + - id: check_scons_log + name: Check SCons build log for errors + if: contains(fromJSON(steps.parse_commands.outputs.run), 'Build log') + continue-on-error: true + uses: ./.github/actions/timed-check + with: + script: .github/checks/check_sconscript_log.py + display: Build log + + - name: Post results + if: always() + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ github.event.issue.number }} + PR_SHA: ${{ env.PR_SHA }} + run: | + python .github/helper_scripts/post_check_results.py \ + ${{ contains(github.event.comment.body, '--post') && '--post' || '' }} diff --git a/.github/workflows/pr-close-comment.yml b/.github/workflows/pr-close-comment.yml new file mode 100644 index 0000000..6f5db5e --- /dev/null +++ b/.github/workflows/pr-close-comment.yml @@ -0,0 +1,34 @@ +name: PR Close Comment + +on: + pull_request: + types: [closed] + +permissions: + issues: write + pull-requests: write + +jobs: + comment: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.x" + + - name: Install dependencies + run: pip install PyGithub + + - name: Comment on PR close + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + BRANCH_NAME: ${{ github.event.pull_request.head.ref }} + LAST_COMMIT_SHA: ${{ github.event.pull_request.head.sha }} + run: python .github/helper_scripts/pr_close_comment.py diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f7c5d65..b33128d 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -20,9 +20,9 @@ jobs: os: [ ubuntu-latest, windows-latest, macos-latest ] python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/README.md b/README.md index b20b4a7..6b2694b 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,14 @@ env.Stata(target, source) - For tips on batch-specifying targets and sources, see [./docs/batch_specifying.md](./docs/batch_specifying.md). +### Automation + +The repository is prebuilt with some automated testing using [Github Actions](./.github). + +To run all tests, add `[run-actions-all]` to a commit message or type `/run-actions-all` in a comment. + +To run a particular test, type `/run-actions-NAMEOFTEST` in a comment (e.g., `/run-actions-log` to run [this test](./.github/checks/check_sconscript_log.py); see [commands](./.github/checks/checks.json) for others). + ### Citations and expectations for usage This template is based on [gslab-econ/Template/v4.1.3](https://github.com/gslab-econ/template/releases/tag/4.1.3) and [gslab-python/v4.1.4](https://github.com/gslab-econ/gslab_python/releases/tag/v4.1.4). diff --git a/docs/workflow.md b/docs/workflow.md index 1b142a3..aee04bf 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -29,7 +29,7 @@ We suggest the following adaptation of [Github flow](https://docs.github.com/en/ * the issue subfolder (ephemeral deliverables) * the latest version of the issue branch prior to merging (all deliverables). * **Link** to the summary comment in the pull request. - * **Update** all open branches from `main`. + * **Update** all open branches from `main` and (if needed) the datastore. * If you encounter merge conflicts you cannot resolve, check with the _assignee(s)_ of the corresponding issue(s). * **Prioritize** work in the order older pull requests > newer pull requests > older issues > newer issues. * Age is defined by github numbering.