diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.github/workflows/fast.yml b/.github/workflows/fast.yml deleted file mode 100644 index 636a2c9..0000000 --- a/.github/workflows/fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Fast update -on: - workflow_dispatch: - schedule: - - cron: '0 * * * *' -jobs: - fast-update: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - persist-credentials: false - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.6' - - name: Build - env: - GITHUB_TOKEN: ${{ secrets.USER_GITHUB_TOKEN }} - SRC_ORG: ${{ secrets.SRC_ORG }} - DST_ORG: ${{ secrets.DST_ORG }} - run: | - pip install pygithub - python -u mirror_github_org.py diff --git a/.github/workflows/full.yml b/.github/workflows/full.yml index 16a6751..c38f983 100644 --- a/.github/workflows/full.yml +++ b/.github/workflows/full.yml @@ -1,25 +1,26 @@ name: Full update -on: +on: workflow_dispatch: schedule: - - cron: '0 0 * * 0' + - cron: '0 */12 * * *' jobs: full-update: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: - python-version: '3.6' - - name: Build + python-version: '3.12' + cache: "pip" + - name: Install Python dependencies + run: pip install -r requirements.txt + - name: Run update script env: GITHUB_TOKEN: ${{ secrets.USER_GITHUB_TOKEN }} SRC_ORG: ${{ secrets.SRC_ORG }} DST_ORG: ${{ secrets.DST_ORG }} - run: | - pip install pygithub - python -u mirror_github_org.py --full-run + run: python mirror_github_org.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..92b2793 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.direnv diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..b76ba81 --- /dev/null +++ b/flake.lock @@ -0,0 +1,40 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1722987190, + "narHash": "sha256-68hmex5efCiM2aZlAAEcQgmFI4ZwWt8a80vOeB/5w3A=", + "path": "/nix/store/pi3i4rcxl8iadwxpyyx481ycnyxm856b-source", + "rev": "21cc704b5e918c5fbf4f9fff22b4ac2681706d90", + "type": "path" + }, + "original": { + "id": "nixpkgs", + "type": "indirect" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs", + "systems": "systems" + } + }, + "systems": { + "flake": false, + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "id": "systems", + "type": "indirect" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..c0580cb --- /dev/null +++ b/flake.nix @@ -0,0 +1,34 @@ +{ + inputs = { + nixpkgs = {}; + systems.flake = false; + }; + + outputs = inputs: let + inherit (inputs.nixpkgs) lib; + defaultSystems = import inputs.systems; + argsForSystem = system: { + pkgs = inputs.nixpkgs.legacyPackages.${system}; + }; + allArgs = lib.genAttrs defaultSystems argsForSystem; + eachSystem = fn: lib.genAttrs defaultSystems (system: fn allArgs."${system}"); + in { + formatter = eachSystem ({pkgs, ...}: + pkgs.writeShellScriptBin "formatter" '' + ${pkgs.alejandra}/bin/alejandra flake.nix + ''); + + devShells = eachSystem ({pkgs, ...}: { + default = pkgs.mkShell { + name = "auto_mirror"; + nativeBuildInputs = [ + (pkgs.python3.withPackages (p: [ + p.python-lsp-server + + p.pygithub + ])) + ]; + }; + }); + }; +} diff --git a/mirror_github_org.py b/mirror_github_org.py index d803696..c7742c6 100644 --- a/mirror_github_org.py +++ b/mirror_github_org.py @@ -3,6 +3,7 @@ import time import datetime import urllib.parse +from multiprocessing import pool from github import Github from github.GithubException import UnknownObjectException, GithubException @@ -32,71 +33,75 @@ def check_rate_limiting(rl): print("\n") -def mirror(token, src_org, dst_org, full_run=False): - g = Github(token) +def mirror(token, src_org, dst_org): + g = Github(token, per_page=100) src_org = g.get_organization(src_org) dst_org = g.get_organization(dst_org) - for src_repo in src_org.get_repos("public", sort="pushed", direction="desc"): + print("Building downstream repo index...") + dst_repos = {r.name: r for r in dst_org.get_repos("public")} + + def sync_repo(src_repo): check_rate_limiting(src_repo) - dst_repo = None - try: - dst_repo = dst_org.get_repo(src_repo.name) - except UnknownObjectException: - pass + dst_repo = dst_repos.get(src_repo.name) + + def repo_msg(msg): + print(f"{src_repo.name}: {msg}") if not dst_repo: - print("\n\nForking %s..." % src_repo.name, end="") + repo_msg("forking...") try: response = dst_org.create_fork(src_repo) except GithubException as e: if "contains no Git content" in e._GithubException__data["message"]: # Hit an empty repo, which cannot be forked - print("\n * Skipping empty repository", end="") - continue + repo_msg("skipping empty repository") + return else: raise e else: - print("\n\nSyncing %s..." % src_repo.name, end="") + repo_msg("syncing...") - updated = False - for src_branch in src_repo.get_branches(): - check_rate_limiting(src_branch) + dst_refs = {r.ref: r for r in dst_repo.get_git_refs()} + def copy_ref(src_ref, ref_type): + check_rate_limiting(src_ref) - print("\n - %s " % src_branch.name, end=""), - encoded_name = urllib.parse.quote(src_branch.name) + encoded_name = urllib.parse.quote(src_ref.name) + ref_name = "refs/%s/%s" % (ref_type, encoded_name) - try: - dst_ref = dst_repo.get_git_ref(ref="heads/%s" % encoded_name) - except UnknownObjectException: - dst_ref = None + def ref_msg(msg): + print(f"{src_repo.name}({ref_name}): {msg}") + + dst_ref = dst_refs.get(ref_name) try: if dst_ref and dst_ref.object: - if src_branch.commit.sha != dst_ref.object.sha: - print("(updated)", end="") - dst_ref.edit(sha=src_branch.commit.sha, force=True) - updated = True + if src_ref.commit.sha != dst_ref.object.sha: + dst_ref.edit(sha=src_ref.commit.sha, force=True) + ref_msg("updated reference") else: - print("(new)", end="") dst_repo.create_git_ref( - ref="refs/heads/%s" % encoded_name, sha=src_branch.commit.sha + ref=ref_name, sha=src_ref.commit.sha ) - updated = True + ref_msg("new reference") except GithubException as e: if e.status == 422: - print("\n * Github API hit a transient validation error, ignoring for now: ", e, end="") + ref_msg(f"Github API hit a transient validation error, ignoring for now: {e}") else: raise e - if not full_run and not updated: - print("\n\nNo more updates to mirror. Ending run.") - sys.exit(0) + for src_branch in src_repo.get_branches(): + copy_ref(src_branch, "heads") + + for src_tag in src_repo.get_tags(): + copy_ref(src_tag, "tags") + with pool.ThreadPool(processes=10) as p: + p.map(sync_repo, src_org.get_repos("public")) if __name__ == "__main__": p = {} @@ -106,9 +111,4 @@ def mirror(token, src_org, dst_org, full_run=False): print("No %s supplied in env" % param) sys.exit(1) - full_run=False - if "--full-run" in sys.argv: - print("Doing a full run, will check all repositories and branches - This may take a long time") - full_run = True - - mirror(p["GITHUB_TOKEN"], p["SRC_ORG"], p["DST_ORG"], full_run=full_run) + mirror(p["GITHUB_TOKEN"], p["SRC_ORG"], p["DST_ORG"]) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b81016f --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pygithub