From 8152b85488baa577947d6e938d93b1b9c1377f9f Mon Sep 17 00:00:00 2001 From: Luca Muscariello Date: Mon, 29 Sep 2025 07:38:44 +0200 Subject: [PATCH 1/4] docs(ci): add requirements.txt and update workflow to use it for dependencies Signed-off-by: Luca Muscariello --- .github/scripts/collect_org_stats.py | 171 ++++++++++++++++++ .github/scripts/collect_pypi_stats_full.py | 72 ++++++++ .github/scripts/requirements.txt | 2 + .../scrape_ghcr_downloads_playwright.py | 46 +++++ .github/workflows/org-stats.yml | 61 +++++++ 5 files changed, 352 insertions(+) create mode 100644 .github/scripts/collect_org_stats.py create mode 100644 .github/scripts/collect_pypi_stats_full.py create mode 100644 .github/scripts/requirements.txt create mode 100644 .github/scripts/scrape_ghcr_downloads_playwright.py create mode 100644 .github/workflows/org-stats.yml diff --git a/.github/scripts/collect_org_stats.py b/.github/scripts/collect_org_stats.py new file mode 100644 index 00000000..c7c0ce6c --- /dev/null +++ b/.github/scripts/collect_org_stats.py @@ -0,0 +1,171 @@ +import re +# Scrape all package URLs from the agntcy org packages page +def get_all_package_urls(org): + urls = [] + # Helper to extract package name and type from URL + def parse_package_info(url): + # Example: https://github.com/orgs/agntcy/packages/container/package/dir-apiserver + m = re.match(r"https://github.com/orgs/[^/]+/packages/(?P[^/]+)/package/(?P.+)", url) + if m: + return m.group("name"), m.group("type") + return url, "unknown" + page = 1 + while True: + url = f"https://github.com/orgs/{org}/packages?page={page}" + resp = requests.get(url) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + from bs4 import Tag + links = soup.find_all("a", href=True) + found = False + for link in links: + if isinstance(link, Tag): + href = link.get("href") + if isinstance(href, str) and href.startswith(f"/orgs/{org}/packages/container/package/"): + urls.append(f"https://github.com{href}") + found = True + # If no package links found, break + if not found: + break + page += 1 + return urls, parse_package_info + urls = [] + page = 1 + while True: + url = f"https://github.com/orgs/{org}/packages?page={page}" + resp = requests.get(url) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + from bs4 import Tag + links = soup.find_all("a", href=True) + found = False + for link in links: + if isinstance(link, Tag): + href = link.get("href") + if isinstance(href, str) and href.startswith(f"/orgs/{org}/packages/container/package/"): + urls.append(f"https://github.com{href}") + found = True + # If no package links found, break + if not found: + break + page += 1 + return urls +import os +import requests +import csv +from bs4 import BeautifulSoup + +ORG = "agntcy" +GITHUB_API = "https://api.github.com" +TOKEN = os.environ.get("GITHUB_TOKEN") +HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"} + +CSV_PATH = os.path.join(os.path.dirname(__file__), "agntcy_org_stats.csv") + +fields = [ + "name", "full_name", "description", "html_url", "created_at", "updated_at", "pushed_at", + "stargazers_count", "forks_count", "open_issues_count", "archived", "disabled", + "unique_views", "artifact_downloads", "package_downloads" +] + +def get_all_repos(org): + repos = [] + page = 1 + while True: + url = f"{GITHUB_API}/orgs/{org}/repos?per_page=100&page={page}" + resp = requests.get(url, headers=HEADERS) + resp.raise_for_status() + data = resp.json() + if not data: + break + repos.extend(data) + page += 1 + return repos + +def get_repo_views(owner, repo): + url = f"{GITHUB_API}/repos/{owner}/{repo}/traffic/views" + resp = requests.get(url, headers=HEADERS) + if resp.status_code == 200: + data = resp.json() + return data.get("uniques", 0) + return "" + +def get_artifact_downloads(owner, repo): + url = f"{GITHUB_API}/repos/{owner}/{repo}/actions/artifacts" + resp = requests.get(url, headers=HEADERS) + if resp.status_code == 200: + data = resp.json() + total = 0 + for artifact in data.get("artifacts", []): + total += artifact.get("download_count", 0) + return total + return "" + +# Scrape GitHub Packages download count for a given package URL +def scrape_package_downloads(package_url): + # Instead of scraping the individual package page, scrape the org packages list page for all counts at once + # This function will be replaced by scrape_all_package_downloads + return None + +# Scrape all package download counts from the org packages list page +def scrape_all_package_downloads(org): + url = f"https://github.com/orgs/{org}/packages" + resp = requests.get(url) + resp.raise_for_status() + html = resp.text + # Regex to match: [name](url) ... k + # Example: [dir-apiserver](...) ... 9.49k + pattern = re.compile(r"\[(?P[^\]]+)\]\(https://github.com/orgs/[^/]+/packages/container/package/(?P[^)]+)\)[^\n]*?(?P[\d\.]+k|[\d,]+)") + results = {} + for match in pattern.finditer(html): + name = match.group("name") + count = match.group("count") + # Convert k to integer + if "k" in count: + count = int(float(count.replace("k", "")) * 1000) + else: + count = int(count.replace(",", "")) + results[name] = count + return results + +def main(): + repos = get_all_repos(ORG) + # Scrape all package URLs and their download counts + package_urls, parse_package_info = get_all_package_urls(ORG) + # Scrape all package download counts from the org packages list page + package_counts = scrape_all_package_downloads(ORG) + package_info_list = [] + for url in package_urls: + name, ptype = parse_package_info(url) + count = package_counts.get(name, "") + package_info_list.append({"name": name, "type": ptype, "download_count": count}) + + # Write package stats to a separate CSV file + package_csv_path = os.path.join(os.path.dirname(__file__), "agntcy_packages_stats.csv") + with open(package_csv_path, "w", newline="") as pkgfile: + pkg_writer = csv.DictWriter(pkgfile, fieldnames=["name", "type", "download_count"]) + pkg_writer.writeheader() + for pkg in package_info_list: + pkg_writer.writerow(pkg) + + with open(CSV_PATH, "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fields) + writer.writeheader() + for repo in repos: + row = {key: repo.get(key, "") for key in fields} + owner = repo.get("owner", {}).get("login", ORG) + repo_name = repo.get("name", "") + row["unique_views"] = get_repo_views(owner, repo_name) + row["artifact_downloads"] = get_artifact_downloads(owner, repo_name) + # Find matching package URLs for this repo + matching_names = [parse_package_info(u)[0] for u in package_urls if f"/{repo_name}" in u] + if matching_names: + row["package_downloads"] = ", ".join(str(package_counts.get(n, "")) for n in matching_names) + else: + row["package_downloads"] = "" + writer.writerow(row) + print(f"Wrote {len(repos)} repos to {CSV_PATH}") + print(f"Wrote {len(package_info_list)} packages to {package_csv_path}") + +if __name__ == "__main__": + main() diff --git a/.github/scripts/collect_pypi_stats_full.py b/.github/scripts/collect_pypi_stats_full.py new file mode 100644 index 00000000..7db324b2 --- /dev/null +++ b/.github/scripts/collect_pypi_stats_full.py @@ -0,0 +1,72 @@ + +import requests +import csv +import os + +CSV_PATH = os.path.join(os.path.dirname(__file__), "agntcy_pypi_stats_full.csv") + +agntcy_pkgs = [ + "agntcy-app-sdk", + "metrics-computation-engine", + "ioa-observe-sdk", + "agntcy-dir", + "slim-mcp", + "slima2a", + "slimrpc", + "slim-bindings", + "mce-metrics-plugin", + "mce-ragas-adapter", + "mce-opik-adapter", + "mce-deepeval-adapter", + "agntcy-identity-sdk", + "agntcy-dir-sdk", + "ioa-metrics-computation-engine", + "agntcy-dir-client-sdk", + "agntcy-acp", + "agp-mcp", + "agp-bindings", + "agntcy-iomapper", + "agntcy-pypi-sample" +] + +rows = [] +for pkg in agntcy_pkgs: + meta_url = f"https://pypi.org/pypi/{pkg}/json" + r = requests.get(meta_url) + version = "" + if r.status_code == 200: + info = r.json().get("info", {}) + version = info.get("version", "") + # Get downloads + stats_url = f"https://pypistats.org/api/packages/{pkg}/recent" + s = requests.get(stats_url) + last_day = last_week = last_month = "" + if s.status_code == 200: + stats = s.json().get("data", {}) + last_day = stats.get("last_day", 0) + last_week = stats.get("last_week", 0) + last_month = stats.get("last_month", 0) + rows.append({ + "name": pkg, + "version": version, + "last_day_downloads": last_day, + "last_week_downloads": last_week, + "last_month_downloads": last_month + }) + +with open(CSV_PATH, "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=["name", "version", "last_day_downloads", "last_week_downloads", "last_month_downloads"]) + writer.writeheader() + for row in rows: + writer.writerow(row) +print(f"Wrote {len(rows)} agntcy-maintained PyPI packages to {CSV_PATH}") + +# Write markdown report +MD_PATH = os.path.join(os.path.dirname(__file__), "agntcy_pypi_stats_report.md") +with open(MD_PATH, "w") as mdfile: + mdfile.write("# AGNTCY PyPI Package Download Stats\n\n") + mdfile.write("| Package | Version | Last Day | Last Week | Last Month |\n") + mdfile.write("|---------|---------|----------|-----------|------------|\n") + for row in rows: + mdfile.write(f"| {row['name']} | {row['version']} | {row['last_day_downloads']} | {row['last_week_downloads']} | {row['last_month_downloads']} |\n") +print(f"Wrote markdown report to {MD_PATH}") diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt new file mode 100644 index 00000000..73fd2ab9 --- /dev/null +++ b/.github/scripts/requirements.txt @@ -0,0 +1,2 @@ +requests +playwright diff --git a/.github/scripts/scrape_ghcr_downloads_playwright.py b/.github/scripts/scrape_ghcr_downloads_playwright.py new file mode 100644 index 00000000..7a71fe83 --- /dev/null +++ b/.github/scripts/scrape_ghcr_downloads_playwright.py @@ -0,0 +1,46 @@ +import asyncio +from playwright.async_api import async_playwright +import csv + +ORG_URL = "https://github.com/orgs/agntcy/packages?type=container" + +async def scrape_ghcr_downloads(): + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + containers = [] + # Get total number of pages + page = await browser.new_page() + await page.goto(ORG_URL, timeout=60000) + await page.wait_for_selector("#org-packages", timeout=60000) + pagination = await page.query_selector(".pagination") + total_pages = 1 + if pagination: + current = await pagination.query_selector("em.current") + if current: + total_pages = int(await current.get_attribute("data-total-pages") or "1") + await page.close() + # Scrape all pages + for i in range(1, total_pages+1): + url = f"https://github.com/orgs/agntcy/packages?page={i}&type=container" + page = await browser.new_page() + await page.goto(url, timeout=60000) + await page.wait_for_selector("#org-packages", timeout=60000) + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + await asyncio.sleep(2) + for row in await page.query_selector_all("#org-packages ul li.Box-row"): + name_tag = await row.query_selector('a.Link--primary') + downloads_tag = await row.query_selector('span.color-fg-muted') + if name_tag and downloads_tag: + name = (await name_tag.text_content()).strip() + downloads = (await downloads_tag.text_content()).strip() + containers.append({"name": name, "downloads": downloads}) + await page.close() + await browser.close() + with open("agntcy_ghcr_downloads.csv", "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=["name", "downloads"]) + writer.writeheader() + writer.writerows(containers) + print(f"Wrote {len(containers)} container download stats to agntcy_ghcr_downloads.csv") + +if __name__ == "__main__": + asyncio.run(scrape_ghcr_downloads()) diff --git a/.github/workflows/org-stats.yml b/.github/workflows/org-stats.yml new file mode 100644 index 00000000..42f20f36 --- /dev/null +++ b/.github/workflows/org-stats.yml @@ -0,0 +1,61 @@ +name: Collect AGNTCY Org Repo Statistics + +on: + schedule: + - cron: '0 0 * * *' # daily at midnight UTC + workflow_dispatch: + +jobs: + collect-stats: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r .github/scripts/requirements.txt + playwright install chromium + + - name: Collect org repo statistics + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + python .github/scripts/collect_org_stats.py + + - name: Upload statistics CSV + uses: actions/upload-artifact@v4 + with: + name: agntcy-org-repo-stats + path: .github/scripts/agntcy_org_stats.csv + + - name: Collect PyPI package statistics + run: | + python .github/scripts/collect_pypi_stats_full.py + + - name: Install Playwright and dependencies + run: | + pip install playwright + playwright install chromium + + - name: Collect GHCR container download stats + run: | + python .github/scripts/scrape_ghcr_downloads_playwright.py + + - name: Upload PyPI statistics CSV + uses: actions/upload-artifact@v4 + with: + name: agntcy-pypi-stats + path: .github/scripts/agntcy_pypi_stats_full.csv + + - name: Upload GHCR statistics CSV + uses: actions/upload-artifact@v4 + with: + name: agntcy-ghcr-downloads + path: .github/scripts/agntcy_ghcr_downloads.csv From 7d20988dbd92345882a73f9ed5b44c535ce1bf65 Mon Sep 17 00:00:00 2001 From: Luca Muscariello Date: Mon, 29 Sep 2025 08:39:30 +0200 Subject: [PATCH 2/4] ci: restrict workflow to docs and mkdocs folders Signed-off-by: Luca Muscariello --- .github/workflows/cicd.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 3a33f48f..e8eb72ba 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -7,8 +7,14 @@ on: push: tags: - 'v*.*.*' + paths: + - 'docs/**' + - 'mkdocs/**' pull_request: + paths: + - 'docs/**' + - 'mkdocs/**' concurrency: group: ${{ github.workflow }}-${{ github.ref }} From 315981d826a2262ca2853187bbb7d5ac5dbaaf6c Mon Sep 17 00:00:00 2001 From: Luca Muscariello Date: Mon, 29 Sep 2025 13:42:53 +0200 Subject: [PATCH 3/4] ci: add beautifulsoup4 to requirements.txt for bs4 import Signed-off-by: Luca Muscariello --- .github/scripts/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt index 73fd2ab9..640140b3 100644 --- a/.github/scripts/requirements.txt +++ b/.github/scripts/requirements.txt @@ -1,2 +1,4 @@ requests playwright + +beautifulsoup4 From 11c021c31f038e53eef384e1035739d89235e6aa Mon Sep 17 00:00:00 2001 From: Luca Muscariello Date: Thu, 29 Jan 2026 11:48:53 +0100 Subject: [PATCH 4/4] docs: improve windows troubleshooting steps in directory-gui Signed-off-by: Luca Muscariello --- docs/dir/directory-gui.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/dir/directory-gui.md b/docs/dir/directory-gui.md index dfe18c5d..26a6dca6 100644 --- a/docs/dir/directory-gui.md +++ b/docs/dir/directory-gui.md @@ -98,10 +98,12 @@ commonly appears when installing software downloaded from GitHub because the application is unrecognized, unsigned, or lacks a high reputation score. ### How to Run the App ("Run Anyway") -When the blue warning screen appears: -1. Click the **More info** link in the text of the popup. -2. Click the **Run anyway** button that appears at the bottom. +To unblock an .exe file in Windows 11, right-click the file, select Properties, +check the Unblock box under the "General" tab, and click Apply. If the option is +missing, the file is not blocked. + +You can also bypass SmartScreen by clicking "More info" > "Run anyway". ### Why This Happens * **Unknown Publisher/Unsigned Code**: Many independent developers on GitHub