From 67595d365fcc1dbbabbb180169306be4e455c729 Mon Sep 17 00:00:00 2001 From: novenary Date: Mon, 13 May 2024 14:53:03 +0300 Subject: [PATCH 01/13] Update dependencies --- .github/workflows/fast.yml | 6 +++--- .github/workflows/full.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/fast.yml b/.github/workflows/fast.yml index 636a2c9..7d01194 100644 --- a/.github/workflows/fast.yml +++ b/.github/workflows/fast.yml @@ -8,13 +8,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: - python-version: '3.6' + python-version: '3.12' - name: Build env: GITHUB_TOKEN: ${{ secrets.USER_GITHUB_TOKEN }} diff --git a/.github/workflows/full.yml b/.github/workflows/full.yml index 16a6751..99c088e 100644 --- a/.github/workflows/full.yml +++ b/.github/workflows/full.yml @@ -8,13 +8,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: - python-version: '3.6' + python-version: '3.12' - name: Build env: GITHUB_TOKEN: ${{ secrets.USER_GITHUB_TOKEN }} From 6913403a9b22c6f0e124568d8b0b7c96f275699c Mon Sep 17 00:00:00 2001 From: novenary Date: Mon, 5 Aug 2024 17:02:58 +0300 Subject: [PATCH 02/13] Update tags as well --- mirror_github_org.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/mirror_github_org.py b/mirror_github_org.py index d803696..f5f2aec 100644 --- a/mirror_github_org.py +++ b/mirror_github_org.py @@ -63,27 +63,28 @@ def mirror(token, src_org, dst_org, full_run=False): print("\n\nSyncing %s..." % src_repo.name, end="") updated = False - for src_branch in src_repo.get_branches(): - check_rate_limiting(src_branch) + def copy_ref(src_ref, ref_type): + nonlocal updated + check_rate_limiting(src_ref) - print("\n - %s " % src_branch.name, end=""), - encoded_name = urllib.parse.quote(src_branch.name) + print("\n - %s " % src_ref.name, end=""), + encoded_name = urllib.parse.quote(src_ref.name) try: - dst_ref = dst_repo.get_git_ref(ref="heads/%s" % encoded_name) + dst_ref = dst_repo.get_git_ref(ref="%s/%s" % (ref_type, encoded_name)) except UnknownObjectException: dst_ref = None try: if dst_ref and dst_ref.object: - if src_branch.commit.sha != dst_ref.object.sha: + if src_ref.commit.sha != dst_ref.object.sha: print("(updated)", end="") - dst_ref.edit(sha=src_branch.commit.sha, force=True) + dst_ref.edit(sha=src_ref.commit.sha, force=True) updated = True else: print("(new)", end="") dst_repo.create_git_ref( - ref="refs/heads/%s" % encoded_name, sha=src_branch.commit.sha + ref="refs/%s/%s" % (ref_type, encoded_name), sha=src_ref.commit.sha ) updated = True @@ -93,6 +94,12 @@ def mirror(token, src_org, dst_org, full_run=False): else: raise e + for src_branch in src_repo.get_branches(): + copy_ref(src_branch, "heads") + + for src_tag in src_repo.get_tags(): + copy_ref(src_tag, "tags") + if not full_run and not updated: print("\n\nNo more updates to mirror. Ending run.") sys.exit(0) From 81777f13ff8c86e4956a622665f83bb91b5fdb71 Mon Sep 17 00:00:00 2001 From: novenary Date: Mon, 5 Aug 2024 18:02:43 +0300 Subject: [PATCH 03/13] Fetch entire ref list of destination repo at once This should cut down on the number of requests significantly to speed up the process and avoid hitting the rate limit. --- mirror_github_org.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mirror_github_org.py b/mirror_github_org.py index f5f2aec..663ce6f 100644 --- a/mirror_github_org.py +++ b/mirror_github_org.py @@ -33,7 +33,7 @@ def check_rate_limiting(rl): def mirror(token, src_org, dst_org, full_run=False): - g = Github(token) + g = Github(token, per_page=100) src_org = g.get_organization(src_org) dst_org = g.get_organization(dst_org) @@ -63,17 +63,16 @@ def mirror(token, src_org, dst_org, full_run=False): print("\n\nSyncing %s..." % src_repo.name, end="") updated = False + dst_refs = {r.ref: r for r in dst_repo.get_git_refs()} def copy_ref(src_ref, ref_type): nonlocal updated check_rate_limiting(src_ref) print("\n - %s " % src_ref.name, end=""), encoded_name = urllib.parse.quote(src_ref.name) + ref_name = "refs/%s/%s" % (ref_type, encoded_name) - try: - dst_ref = dst_repo.get_git_ref(ref="%s/%s" % (ref_type, encoded_name)) - except UnknownObjectException: - dst_ref = None + dst_ref = dst_refs.get(ref_name) try: if dst_ref and dst_ref.object: @@ -84,7 +83,7 @@ def copy_ref(src_ref, ref_type): else: print("(new)", end="") dst_repo.create_git_ref( - ref="refs/%s/%s" % (ref_type, encoded_name), sha=src_ref.commit.sha + ref=ref_name, sha=src_ref.commit.sha ) updated = True From ca62021398c2edfc0bb4dfe32d650a9481a1401e Mon Sep 17 00:00:00 2001 From: novenary Date: Mon, 5 Aug 2024 18:28:08 +0300 Subject: [PATCH 04/13] Bump full runs to daily schedule --- .github/workflows/full.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/full.yml b/.github/workflows/full.yml index 99c088e..0b0334a 100644 --- a/.github/workflows/full.yml +++ b/.github/workflows/full.yml @@ -2,7 +2,7 @@ name: Full update on: workflow_dispatch: schedule: - - cron: '0 0 * * 0' + - cron: '0 0 * * *' jobs: full-update: runs-on: ubuntu-latest From fc56c4442b0f6692b02edde751724681bec16ac4 Mon Sep 17 00:00:00 2001 From: novenary Date: Fri, 16 Aug 2024 21:54:32 +0300 Subject: [PATCH 05/13] Disable "fast" runs Fast runs finish very quickly, but still count as a full minute due to how github does accounting. It's cheaper on credits to run the full update a bit more often, and more reliable too. --- .github/workflows/fast.yml | 25 ------------------------- .github/workflows/full.yml | 2 +- 2 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 .github/workflows/fast.yml diff --git a/.github/workflows/fast.yml b/.github/workflows/fast.yml deleted file mode 100644 index 7d01194..0000000 --- a/.github/workflows/fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Fast update -on: - workflow_dispatch: - schedule: - - cron: '0 * * * *' -jobs: - fast-update: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - name: Build - env: - GITHUB_TOKEN: ${{ secrets.USER_GITHUB_TOKEN }} - SRC_ORG: ${{ secrets.SRC_ORG }} - DST_ORG: ${{ secrets.DST_ORG }} - run: | - pip install pygithub - python -u mirror_github_org.py diff --git a/.github/workflows/full.yml b/.github/workflows/full.yml index 0b0334a..fc6337c 100644 --- a/.github/workflows/full.yml +++ b/.github/workflows/full.yml @@ -2,7 +2,7 @@ name: Full update on: workflow_dispatch: schedule: - - cron: '0 0 * * *' + - cron: '0 */12 * * *' jobs: full-update: runs-on: ubuntu-latest From e93cbf5878a1dca86cd76afd5dd8b0ca5fe0456d Mon Sep 17 00:00:00 2001 From: novenary Date: Fri, 16 Aug 2024 22:05:33 +0300 Subject: [PATCH 06/13] Remove fast run logic --- .github/workflows/full.yml | 2 +- mirror_github_org.py | 17 ++--------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/.github/workflows/full.yml b/.github/workflows/full.yml index fc6337c..a27facc 100644 --- a/.github/workflows/full.yml +++ b/.github/workflows/full.yml @@ -22,4 +22,4 @@ jobs: DST_ORG: ${{ secrets.DST_ORG }} run: | pip install pygithub - python -u mirror_github_org.py --full-run + python -u mirror_github_org.py diff --git a/mirror_github_org.py b/mirror_github_org.py index 663ce6f..09b033b 100644 --- a/mirror_github_org.py +++ b/mirror_github_org.py @@ -32,7 +32,7 @@ def check_rate_limiting(rl): print("\n") -def mirror(token, src_org, dst_org, full_run=False): +def mirror(token, src_org, dst_org): g = Github(token, per_page=100) src_org = g.get_organization(src_org) @@ -62,10 +62,8 @@ def mirror(token, src_org, dst_org, full_run=False): else: print("\n\nSyncing %s..." % src_repo.name, end="") - updated = False dst_refs = {r.ref: r for r in dst_repo.get_git_refs()} def copy_ref(src_ref, ref_type): - nonlocal updated check_rate_limiting(src_ref) print("\n - %s " % src_ref.name, end=""), @@ -79,13 +77,11 @@ def copy_ref(src_ref, ref_type): if src_ref.commit.sha != dst_ref.object.sha: print("(updated)", end="") dst_ref.edit(sha=src_ref.commit.sha, force=True) - updated = True else: print("(new)", end="") dst_repo.create_git_ref( ref=ref_name, sha=src_ref.commit.sha ) - updated = True except GithubException as e: if e.status == 422: @@ -99,10 +95,6 @@ def copy_ref(src_ref, ref_type): for src_tag in src_repo.get_tags(): copy_ref(src_tag, "tags") - if not full_run and not updated: - print("\n\nNo more updates to mirror. Ending run.") - sys.exit(0) - if __name__ == "__main__": p = {} @@ -112,9 +104,4 @@ def copy_ref(src_ref, ref_type): print("No %s supplied in env" % param) sys.exit(1) - full_run=False - if "--full-run" in sys.argv: - print("Doing a full run, will check all repositories and branches - This may take a long time") - full_run = True - - mirror(p["GITHUB_TOKEN"], p["SRC_ORG"], p["DST_ORG"], full_run=full_run) + mirror(p["GITHUB_TOKEN"], p["SRC_ORG"], p["DST_ORG"]) From dd03e44529c642c74f7ac6a46e39603efb043c29 Mon Sep 17 00:00:00 2001 From: novenary Date: Fri, 16 Aug 2024 22:12:43 +0300 Subject: [PATCH 07/13] Enumerate destination org's repos --- mirror_github_org.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/mirror_github_org.py b/mirror_github_org.py index 09b033b..0711e49 100644 --- a/mirror_github_org.py +++ b/mirror_github_org.py @@ -38,14 +38,11 @@ def mirror(token, src_org, dst_org): src_org = g.get_organization(src_org) dst_org = g.get_organization(dst_org) - for src_repo in src_org.get_repos("public", sort="pushed", direction="desc"): + dst_repos = {r.name: r for r in dst_org.get_repos("public")} + for src_repo in src_org.get_repos("public"): check_rate_limiting(src_repo) - dst_repo = None - try: - dst_repo = dst_org.get_repo(src_repo.name) - except UnknownObjectException: - pass + dst_repo = dst_repos.get(src_repo.name) if not dst_repo: print("\n\nForking %s..." % src_repo.name, end="") From 1ba8993d7918cd3c8531135da325e9b4e0a61bad Mon Sep 17 00:00:00 2001 From: novenary Date: Fri, 16 Aug 2024 22:17:50 +0300 Subject: [PATCH 08/13] Skip up to date repos entirely --- mirror_github_org.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/mirror_github_org.py b/mirror_github_org.py index 0711e49..a5467b0 100644 --- a/mirror_github_org.py +++ b/mirror_github_org.py @@ -59,8 +59,12 @@ def mirror(token, src_org, dst_org): else: print("\n\nSyncing %s..." % src_repo.name, end="") - dst_refs = {r.ref: r for r in dst_repo.get_git_refs()} + if src_repo.pushed_at <= dst_repo.pushed_at: + print(" (up to date)", end="") + continue + def copy_ref(src_ref, ref_type): + nonlocal updated check_rate_limiting(src_ref) print("\n - %s " % src_ref.name, end=""), @@ -74,11 +78,13 @@ def copy_ref(src_ref, ref_type): if src_ref.commit.sha != dst_ref.object.sha: print("(updated)", end="") dst_ref.edit(sha=src_ref.commit.sha, force=True) + updated = True else: print("(new)", end="") dst_repo.create_git_ref( ref=ref_name, sha=src_ref.commit.sha ) + updated = True except GithubException as e: if e.status == 422: @@ -86,11 +92,30 @@ def copy_ref(src_ref, ref_type): else: raise e - for src_branch in src_repo.get_branches(): - copy_ref(src_branch, "heads") + while True: + dst_refs = {r.ref: r for r in dst_repo.get_git_refs()} + updated = False + + for src_branch in src_repo.get_branches(): + copy_ref(src_branch, "heads") + + for src_tag in src_repo.get_tags(): + copy_ref(src_tag, "tags") - for src_tag in src_repo.get_tags(): - copy_ref(src_tag, "tags") + # Pull requests bump the pushed date, + # no need to retry if we haven't updated any refs + if not updated: + break + + last_src_repo = src_repo + src_repo = src_org.get_repo(src_repo.name) + if src_repo.id != last_src_repo.id: + print("\n * Got a different repo while retrying!") + sys.exit(1) + if src_repo.pushed_at != last_src_repo.pushed_at: + print("\n * Upstream was pushed while updating, retrying...") + else: + break if __name__ == "__main__": From 8f758352b5d3b28d2cfc75069d371b67debd1f4b Mon Sep 17 00:00:00 2001 From: novenary Date: Fri, 16 Aug 2024 23:45:47 +0300 Subject: [PATCH 09/13] Revert "Skip up to date repos entirely" No point over-optimizing for now, let's prioritize reliability. This reverts commit 1ba8993d7918cd3c8531135da325e9b4e0a61bad. --- mirror_github_org.py | 35 +++++------------------------------ 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/mirror_github_org.py b/mirror_github_org.py index a5467b0..0711e49 100644 --- a/mirror_github_org.py +++ b/mirror_github_org.py @@ -59,12 +59,8 @@ def mirror(token, src_org, dst_org): else: print("\n\nSyncing %s..." % src_repo.name, end="") - if src_repo.pushed_at <= dst_repo.pushed_at: - print(" (up to date)", end="") - continue - + dst_refs = {r.ref: r for r in dst_repo.get_git_refs()} def copy_ref(src_ref, ref_type): - nonlocal updated check_rate_limiting(src_ref) print("\n - %s " % src_ref.name, end=""), @@ -78,13 +74,11 @@ def copy_ref(src_ref, ref_type): if src_ref.commit.sha != dst_ref.object.sha: print("(updated)", end="") dst_ref.edit(sha=src_ref.commit.sha, force=True) - updated = True else: print("(new)", end="") dst_repo.create_git_ref( ref=ref_name, sha=src_ref.commit.sha ) - updated = True except GithubException as e: if e.status == 422: @@ -92,30 +86,11 @@ def copy_ref(src_ref, ref_type): else: raise e - while True: - dst_refs = {r.ref: r for r in dst_repo.get_git_refs()} - updated = False - - for src_branch in src_repo.get_branches(): - copy_ref(src_branch, "heads") - - for src_tag in src_repo.get_tags(): - copy_ref(src_tag, "tags") + for src_branch in src_repo.get_branches(): + copy_ref(src_branch, "heads") - # Pull requests bump the pushed date, - # no need to retry if we haven't updated any refs - if not updated: - break - - last_src_repo = src_repo - src_repo = src_org.get_repo(src_repo.name) - if src_repo.id != last_src_repo.id: - print("\n * Got a different repo while retrying!") - sys.exit(1) - if src_repo.pushed_at != last_src_repo.pushed_at: - print("\n * Upstream was pushed while updating, retrying...") - else: - break + for src_tag in src_repo.get_tags(): + copy_ref(src_tag, "tags") if __name__ == "__main__": From 49333541cbb318494707e986403ffc0870d01a30 Mon Sep 17 00:00:00 2001 From: novenary Date: Sat, 17 Aug 2024 16:23:28 +0300 Subject: [PATCH 10/13] Enable pip cache --- .github/workflows/full.yml | 11 ++++++----- requirements.txt | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 requirements.txt diff --git a/.github/workflows/full.yml b/.github/workflows/full.yml index a27facc..71d046d 100644 --- a/.github/workflows/full.yml +++ b/.github/workflows/full.yml @@ -1,5 +1,5 @@ name: Full update -on: +on: workflow_dispatch: schedule: - cron: '0 */12 * * *' @@ -15,11 +15,12 @@ jobs: uses: actions/setup-python@v5 with: python-version: '3.12' - - name: Build + cache: "pip" + - name: Install Python dependencies + run: pip install -r requirements.txt + - name: Run update script env: GITHUB_TOKEN: ${{ secrets.USER_GITHUB_TOKEN }} SRC_ORG: ${{ secrets.SRC_ORG }} DST_ORG: ${{ secrets.DST_ORG }} - run: | - pip install pygithub - python -u mirror_github_org.py + run: python -u mirror_github_org.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b81016f --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pygithub From 926faaf76d2d0034fd2a21789349164a4b49622c Mon Sep 17 00:00:00 2001 From: novenary Date: Sat, 17 Aug 2024 16:50:21 +0300 Subject: [PATCH 11/13] Add nix flake --- .envrc | 1 + .gitignore | 1 + flake.lock | 40 ++++++++++++++++++++++++++++++++++++++++ flake.nix | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+) create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..92b2793 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.direnv diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..b76ba81 --- /dev/null +++ b/flake.lock @@ -0,0 +1,40 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1722987190, + "narHash": "sha256-68hmex5efCiM2aZlAAEcQgmFI4ZwWt8a80vOeB/5w3A=", + "path": "/nix/store/pi3i4rcxl8iadwxpyyx481ycnyxm856b-source", + "rev": "21cc704b5e918c5fbf4f9fff22b4ac2681706d90", + "type": "path" + }, + "original": { + "id": "nixpkgs", + "type": "indirect" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs", + "systems": "systems" + } + }, + "systems": { + "flake": false, + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "id": "systems", + "type": "indirect" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..c0580cb --- /dev/null +++ b/flake.nix @@ -0,0 +1,34 @@ +{ + inputs = { + nixpkgs = {}; + systems.flake = false; + }; + + outputs = inputs: let + inherit (inputs.nixpkgs) lib; + defaultSystems = import inputs.systems; + argsForSystem = system: { + pkgs = inputs.nixpkgs.legacyPackages.${system}; + }; + allArgs = lib.genAttrs defaultSystems argsForSystem; + eachSystem = fn: lib.genAttrs defaultSystems (system: fn allArgs."${system}"); + in { + formatter = eachSystem ({pkgs, ...}: + pkgs.writeShellScriptBin "formatter" '' + ${pkgs.alejandra}/bin/alejandra flake.nix + ''); + + devShells = eachSystem ({pkgs, ...}: { + default = pkgs.mkShell { + name = "auto_mirror"; + nativeBuildInputs = [ + (pkgs.python3.withPackages (p: [ + p.python-lsp-server + + p.pygithub + ])) + ]; + }; + }); + }; +} From 6c139943ef96119623c114f9323f6afd4684b378 Mon Sep 17 00:00:00 2001 From: novenary Date: Sat, 17 Aug 2024 17:10:14 +0300 Subject: [PATCH 12/13] Rework log messages --- .github/workflows/full.yml | 2 +- mirror_github_org.py | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/full.yml b/.github/workflows/full.yml index 71d046d..c38f983 100644 --- a/.github/workflows/full.yml +++ b/.github/workflows/full.yml @@ -23,4 +23,4 @@ jobs: GITHUB_TOKEN: ${{ secrets.USER_GITHUB_TOKEN }} SRC_ORG: ${{ secrets.SRC_ORG }} DST_ORG: ${{ secrets.DST_ORG }} - run: python -u mirror_github_org.py + run: python mirror_github_org.py diff --git a/mirror_github_org.py b/mirror_github_org.py index 0711e49..3f88eeb 100644 --- a/mirror_github_org.py +++ b/mirror_github_org.py @@ -38,51 +38,58 @@ def mirror(token, src_org, dst_org): src_org = g.get_organization(src_org) dst_org = g.get_organization(dst_org) + print("Building downstream repo index...") dst_repos = {r.name: r for r in dst_org.get_repos("public")} + for src_repo in src_org.get_repos("public"): check_rate_limiting(src_repo) dst_repo = dst_repos.get(src_repo.name) + def repo_msg(msg): + print(f"{src_repo.name}: {msg}") + if not dst_repo: - print("\n\nForking %s..." % src_repo.name, end="") + repo_msg("forking...") try: response = dst_org.create_fork(src_repo) except GithubException as e: if "contains no Git content" in e._GithubException__data["message"]: # Hit an empty repo, which cannot be forked - print("\n * Skipping empty repository", end="") + repo_msg("skipping empty repository") continue else: raise e else: - print("\n\nSyncing %s..." % src_repo.name, end="") + repo_msg("syncing...") dst_refs = {r.ref: r for r in dst_repo.get_git_refs()} def copy_ref(src_ref, ref_type): check_rate_limiting(src_ref) - print("\n - %s " % src_ref.name, end=""), encoded_name = urllib.parse.quote(src_ref.name) ref_name = "refs/%s/%s" % (ref_type, encoded_name) + def ref_msg(msg): + print(f"{src_repo.name}({ref_name}): {msg}") + dst_ref = dst_refs.get(ref_name) try: if dst_ref and dst_ref.object: if src_ref.commit.sha != dst_ref.object.sha: - print("(updated)", end="") dst_ref.edit(sha=src_ref.commit.sha, force=True) + ref_msg("updated reference") else: - print("(new)", end="") dst_repo.create_git_ref( ref=ref_name, sha=src_ref.commit.sha ) + ref_msg("new reference") except GithubException as e: if e.status == 422: - print("\n * Github API hit a transient validation error, ignoring for now: ", e, end="") + ref_msg(f"Github API hit a transient validation error, ignoring for now: {e}") else: raise e From 8bacacd0a1e97a0f50b3a4a99ea783644027c586 Mon Sep 17 00:00:00 2001 From: novenary Date: Sat, 17 Aug 2024 17:19:59 +0300 Subject: [PATCH 13/13] Add threading for concurrent requests --- mirror_github_org.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mirror_github_org.py b/mirror_github_org.py index 3f88eeb..c7742c6 100644 --- a/mirror_github_org.py +++ b/mirror_github_org.py @@ -3,6 +3,7 @@ import time import datetime import urllib.parse +from multiprocessing import pool from github import Github from github.GithubException import UnknownObjectException, GithubException @@ -41,7 +42,7 @@ def mirror(token, src_org, dst_org): print("Building downstream repo index...") dst_repos = {r.name: r for r in dst_org.get_repos("public")} - for src_repo in src_org.get_repos("public"): + def sync_repo(src_repo): check_rate_limiting(src_repo) dst_repo = dst_repos.get(src_repo.name) @@ -57,7 +58,7 @@ def repo_msg(msg): if "contains no Git content" in e._GithubException__data["message"]: # Hit an empty repo, which cannot be forked repo_msg("skipping empty repository") - continue + return else: raise e @@ -99,6 +100,8 @@ def ref_msg(msg): for src_tag in src_repo.get_tags(): copy_ref(src_tag, "tags") + with pool.ThreadPool(processes=10) as p: + p.map(sync_repo, src_org.get_repos("public")) if __name__ == "__main__": p = {}