diff --git a/.github/ACTIONS_README.md b/.github/ACTIONS_README.md new file mode 100644 index 000000000..eea4618c4 --- /dev/null +++ b/.github/ACTIONS_README.md @@ -0,0 +1,223 @@ +# GitHub Actions Structure + +This directory contains all GitHub Actions workflows, reusable components, and documentation for the nv-ingest CI/CD pipeline. + +## 📁 Directory Structure + +``` +.github/ +├── workflows/ # Workflows (including reusable workflows) +├── actions/ # Composite actions (3 actions) +├── ISSUE_TEMPLATE/ # Issue templates +├── CODEOWNERS # Code ownership +├── PULL_REQUEST_TEMPLATE.md +├── copy-pr-bot.yaml +│ +└── Documentation: + ├── README.md (this file) # Overview and quick reference + ├── WORKFLOWS_REFERENCE.md # Complete technical reference + ├── WORKFLOWS_QUICKSTART.md # Quick start guide + └── ARCHITECTURE.md # System architecture +``` + +## 🚀 Quick Start + +### For Developers +Read: [`WORKFLOWS_QUICKSTART.md`](./WORKFLOWS_QUICKSTART.md) + +### For Complete Reference +Read: [`WORKFLOWS_REFERENCE.md`](./WORKFLOWS_REFERENCE.md) + +### For Architecture Details +Read: [`ARCHITECTURE.md`](./ARCHITECTURE.md) + +## 🎯 Workflow Overview + +### Continuous Integration + +| Workflow | File | Trigger | Purpose | +|----------|------|---------|---------| +| PR Validation | `ci-pull-request.yml` | Pull requests | Pre-commit, build, test | +| Main CI | `ci-main.yml` | Push to main | Full validation + multi-platform | + +### Nightly & Scheduled + +| Workflow | File | Schedule | Purpose | +|----------|------|----------|---------| +| Nightly Builds | `scheduled-nightly.yml` | Daily 23:30 UTC | Docker + Conda + PyPI | + +### Release Management + +| Workflow | File | Trigger | Purpose | +|----------|------|---------|---------| +| Docker Release | `release-docker.yml` | release/* branch OR manual | Publish Docker images | +| Conda Release | `release-conda.yml` | release/* branch OR manual | Publish Conda packages | +| PyPI Release | `release-pypi.yml` | release/* branch OR manual | Publish Python wheels | + +### Documentation + +| Workflow | File | Trigger | Purpose | +|----------|------|---------|---------| +| Build & Deploy Docs | `build-docs.yml` | Push to main OR manual | Build docs in Docker, deploy to GitHub Pages (https://nvidia.github.io/nv-ingest/) | +| Docs Deploy (manual) | `docs-deploy.yml` | Manual only | Same as above; use for one-off deploys without pushing to main | + +## 🔧 Reusable Components + +### Workflows (in `workflows/`, prefixed with `reusable-`) + +- `reusable-docker-build.yml` - Flexible Docker image building +- `reusable-docker-test.yml` - Container-based testing +- `reusable-conda-build.yml` - Conda package building +- `reusable-conda-publish.yml` - Conda package publishing +- `reusable-pypi-build.yml` - Python wheel building +- `reusable-pypi-publish.yml` - PyPI publishing +- `reusable-integration-test.yml` - Library mode testing + +### Actions (in `actions/`) + +- `setup-docker-buildx/` - Docker Buildx + QEMU setup +- `docker-login-ngc/` - NGC registry authentication +- `determine-version/` - Smart version determination + +## 📊 Workflow Architecture + +``` +┌─────────────────────┐ +│ Main Workflows │ (Triggered by events) +│ - ci-pull-request │ +│ - ci-main │ +│ - scheduled-* │ +│ - release-* │ +└──────┬──────────────┘ + │ calls + ▼ +┌─────────────────────┐ +│ Reusable Workflows │ (Business logic) +│ - docker-build │ +│ - conda-publish │ +│ - pypi-build │ +└──────┬──────────────┘ + │ uses + ▼ +┌─────────────────────┐ +│ Composite Actions │ (Common operations) +│ - setup-buildx │ +│ - docker-login │ +│ - determine-ver │ +└─────────────────────┘ +``` + +## 🎯 Key Features + +### Reusable Workflows +- Docker build logic defined once, used everywhere +- Consistent patterns across all workflows +- Type-safe interfaces with validation + +### Flexible Configuration +- Reusable workflows accept inputs +- Composite actions are parameterized +- Easy to customize per use case + +### Clear Separation +- Main workflows = triggers + orchestration +- Reusable workflows = business logic +- Composite actions = common operations + +### ✅ Type Safety +- Inputs/outputs explicitly defined +- Required vs optional parameters +- Validation built-in + +### ✅ Better Testing +- Reusable components can be tested independently +- workflow_dispatch for manual testing +- Clear job dependencies + +## 🔐 Required Secrets + +### Docker/NGC +- `DOCKER_PASSWORD` - NGC API token +- `DOCKER_REGISTRY` - Registry URL (e.g., nvcr.io) +- `HF_ACCESS_TOKEN` - Hugging Face token + +### Conda +- `NVIDIA_CONDA_TOKEN` - Anaconda.org token + +### PyPI +- `ARTIFACTORY_URL` - PyPI repository URL +- `ARTIFACTORY_USERNAME` - Username +- `ARTIFACTORY_PASSWORD` - Password + +### Integration Tests +- `NGC_API_KEY` / `NVIDIA_API_KEY` +- `AUDIO_FUNCTION_ID` +- `EMBEDDING_NIM_MODEL_NAME` +- `NEMOTRON_PARSE_MODEL_NAME` +- `PADDLE_HTTP_ENDPOINT` +- `VLM_CAPTION_ENDPOINT` +- `VLM_CAPTION_MODEL_NAME` +- `YOLOX_*_HTTP_ENDPOINT` (multiple) + +## 📝 Common Tasks + +### Run PR checks locally +```bash +pre-commit run --all-files +docker build --target runtime -t nv-ingest:test . +docker run nv-ingest:test pytest -m "not integration" +``` + +### Trigger nightly build manually +``` +Actions → "Nightly Builds & Publishing" → Run workflow +``` + +### Create a release +```bash +# Automatic - All three artifact types (recommended) +git checkout -b release/25.4.0 +git push origin release/25.4.0 +# → Triggers Docker, Conda, AND PyPI releases automatically + +# Manual (for custom options) +Actions → Release - Docker/Conda/PyPI → Run workflow +``` + +### Debug workflows +``` +Actions → Select workflow → View logs → Expand steps +``` + +## 🐛 Troubleshooting + +| Issue | Solution | +|-------|----------| +| Workflow not found | Check path: `.github/workflows/` (reusable workflows are `reusable-*.yml`) | +| Secret not available | Verify in Settings → Secrets → Actions | +| Build timeout | Use `linux-large-disk` runner | +| Integration tests fail | Check NIM endpoints and credentials | + +## 📚 Documentation + +- **Quick Start**: [`WORKFLOWS_QUICKSTART.md`](./WORKFLOWS_QUICKSTART.md) +- **Complete Reference**: [`WORKFLOWS_REFERENCE.md`](./WORKFLOWS_REFERENCE.md) +- **Architecture**: [`ARCHITECTURE.md`](./ARCHITECTURE.md) + +## 🆘 Getting Help + +1. Check workflow logs in Actions tab +2. Review documentation in this folder +3. Search for similar issues +4. Contact DevOps team +5. Open an issue with details + +## 📞 Maintainers + +See [`CODEOWNERS`](./CODEOWNERS) for ownership information. + +--- + +**Architecture**: Reusable workflows + Composite actions +**Documentation**: 4 comprehensive guides +**Total Components**: Workflows + reusable workflows + composite actions diff --git a/.github/INDEX.md b/.github/INDEX.md new file mode 100644 index 000000000..578b94b0d --- /dev/null +++ b/.github/INDEX.md @@ -0,0 +1,236 @@ +# GitHub Actions Documentation Index + +**Complete CI/CD pipeline documentation for nv-ingest** + +--- + +## 📦 What's Included + +This documentation package contains **18 components** organized into a clean, maintainable structure: + +### 🎯 Main Workflows (7 files) +Located in: `.github/workflows/` + +1. **`ci-pull-request.yml`** - PR validation with pre-commit, Docker build/test, library mode +2. **`ci-main.yml`** - Main branch CI with multi-platform testing +3. **`scheduled-nightly.yml`** - Unified nightly builds (Docker + Conda + PyPI) +4. **`release-docker.yml`** - Docker release publishing +5. **`release-conda.yml`** - Conda package releases +6. **`release-pypi.yml`** - PyPI wheel releases +7. **`build-docs.yml`** - Build and deploy documentation to GitHub Pages (runs on push to main) +8. **`docs-deploy.yml`** - Documentation deploy (manual trigger only; primary workflow is build-docs.yml) + +### ♻️ Reusable Workflows (7 files) +Located in: `.github/workflows/` (prefixed with `reusable-`) + +1. **`reusable-docker-build.yml`** - Flexible Docker image building +2. **`reusable-docker-test.yml`** - Container-based testing +3. **`reusable-conda-build.yml`** - Conda package building +4. **`reusable-conda-publish.yml`** - Conda publishing to channels +5. **`reusable-pypi-build.yml`** - Python wheel building +6. **`reusable-pypi-publish.yml`** - PyPI publishing +7. **`reusable-integration-test.yml`** - Library mode integration tests + +### 🔧 Composite Actions (3 directories) +Located in: `.github/actions/` + +1. **`setup-docker-buildx/`** - Docker Buildx + QEMU setup +2. **`docker-login-ngc/`** - NGC registry authentication +3. **`determine-version/`** - Smart version determination + +### 📚 Documentation (4 files) +Located in: `.github/` + +1. **`README.md`** - Main entry point with directory overview +2. **`WORKFLOWS_QUICKSTART.md`** - Quick start guide for developers +3. **`WORKFLOWS_REFERENCE.md`** - Complete technical reference +4. **`ARCHITECTURE.md`** - Visual architecture diagrams +5. **`INDEX.md`** - This file + +--- + +## 🎓 How to Use This Documentation + +### For Different Audiences: + +#### 👨‍💻 **Developers** (Contributing code) +1. Read: **`WORKFLOWS_QUICKSTART.md`** +2. Reference: **`README.md`** for quick lookups + +#### 🔧 **Maintainers** (Managing releases) +1. Read: **`WORKFLOWS_QUICKSTART.md`** (Common tasks) +2. Reference: **`WORKFLOWS_REFERENCE.md`** (Complete details) + +#### 🏗️ **DevOps/SRE** (System maintenance) +1. Read: **`ARCHITECTURE.md`** (System design) +2. Read: **`WORKFLOWS_REFERENCE.md`** (Technical specs) +3. Reference: **`README.md`** (Overview) + +#### 📊 **Management** (Understanding scope) +1. Read: **`README.md`** (Executive summary) +2. Review: **`ARCHITECTURE.md`** (Visual diagrams) + +--- + +## 📊 System Overview + +### Workflow Components + +- **7** main trigger workflows +- **7** reusable workflow components +- **3** composite actions +- **4** documentation files + +### Key Capabilities + +- ✅ Automated PR validation +- ✅ Multi-platform Docker builds (amd64, arm64) +- ✅ Daily nightly builds (Docker, Conda, PyPI) +- ✅ Unified release process (one branch = all artifacts) +- ✅ Integration testing with conda environment +- ✅ Automatic documentation deployment + +--- + +## 🚀 Getting Started + +### Step 1: Understand the Structure +```bash +# Read the main README +cat .github/README.md + +# Review the architecture +cat .github/ARCHITECTURE.md +``` + +### Step 2: Choose Your Path + +**Quick Start (Developers):** +```bash +cat .github/WORKFLOWS_QUICKSTART.md +# Start contributing immediately +``` + +**Complete Reference (Advanced):** +```bash +cat .github/WORKFLOWS_REFERENCE.md +# Deep dive into every workflow +``` + +### Step 3: Start Using + +**For PRs:** +1. Create a pull request +2. Watch automated checks run +3. Address any failures + +**For Releases:** +```bash +git checkout -b release/25.4.0 +git push origin release/25.4.0 +# Automatically releases Docker, Conda, and PyPI +``` + +--- + +## 🔐 Security & Access + +### Required Secrets + +All secrets must be configured in repository settings: + +**Docker/NGC:** +- `DOCKER_PASSWORD` +- `DOCKER_REGISTRY` +- `HF_ACCESS_TOKEN` + +**Conda:** +- `NVIDIA_CONDA_TOKEN` + +**PyPI:** +- `ARTIFACTORY_URL` +- `ARTIFACTORY_USERNAME` +- `ARTIFACTORY_PASSWORD` + +**Integration Tests:** +- `NGC_API_KEY` / `NVIDIA_API_KEY` +- Multiple NIM endpoint secrets + +### Access Control +- External contributors require `ok-to-test` label +- `pull_request_target` used safely with access checks +- Secrets passed explicitly (no implicit access) +- Minimal permissions (least privilege) + +--- + +## 🎯 Quick Reference + +### Common Tasks + +| Task | Location | Action | +|------|----------|--------| +| View workflows | `.github/workflows/` | Browse main triggers | +| Understand logic | `.github/workflows/` | See reusable workflows (`reusable-*.yml`) | +| Check common operations | `.github/actions/` | Review composite actions | +| Quick help | `.github/WORKFLOWS_QUICKSTART.md` | Read guide | +| Complete reference | `.github/WORKFLOWS_REFERENCE.md` | Deep dive | + +### Workflow Triggers + +| Workflow | Automatic | Manual | Purpose | +|----------|-----------|--------|---------| +| PR Validation | PR events | ✓ | Validate changes | +| Main CI | Push to main | ✓ | Full validation | +| Nightly | Daily 23:30 UTC | ✓ | Build & publish | +| Docker Release | release/* branch | ✓ | Release Docker | +| Conda Release | release/* branch | ✓ | Release Conda | +| PyPI Release | release/* branch | ✓ | Release PyPI | +| Docs | Push to main | ✓ | Deploy docs | + +--- + +## 🐛 Troubleshooting + +### Common Issues & Solutions + +| Issue | Solution Document | Section | +|-------|------------------|---------| +| Workflow not triggering | `WORKFLOWS_QUICKSTART.md` | Troubleshooting | +| Reusable workflow not found | `WORKFLOWS_REFERENCE.md` | Reusable Workflows | +| Secret not available | `README.md` | Required Secrets | +| Build failing | `WORKFLOWS_REFERENCE.md` | Docker Build | +| Integration tests failing | `WORKFLOWS_QUICKSTART.md` | Troubleshooting | + +### Getting Help + +1. **Check logs**: Actions tab → Workflow run → Job → Step +2. **Review docs**: Search in `.github/` documentation +3. **Test locally**: Run pre-commit and Docker builds +4. **Ask team**: Contact DevOps or maintainers +5. **Open issue**: Include logs and context + +--- + +## 📚 File Index + +### Documentation Files +``` +.github/ +├── INDEX.md ← You are here +├── README.md ← Start here (overview) +├── ARCHITECTURE.md ← How it works (diagrams) +├── WORKFLOWS_QUICKSTART.md ← Quick reference (developers) +└── WORKFLOWS_REFERENCE.md ← Complete reference (advanced) +``` + +### Workflow Files +``` +.github/ +├── workflows/ ← Workflows (including `reusable-*.yml`) +└── actions/ ← Composite actions (3) +``` + +--- + +**For questions or issues, start with**: `.github/README.md` diff --git a/.github/WORKFLOWS_REFERENCE.md b/.github/WORKFLOWS_REFERENCE.md new file mode 100644 index 000000000..a36639b10 --- /dev/null +++ b/.github/WORKFLOWS_REFERENCE.md @@ -0,0 +1,659 @@ +# GitHub Actions Workflows Reference + +Complete reference documentation for nv-ingest GitHub Actions workflows. + +## Table of Contents +1. [Workflow Overview](#workflow-overview) +2. [Continuous Integration](#continuous-integration) +3. [Nightly Builds](#nightly-builds) +4. [Release Workflows](#release-workflows) +5. [Reusable Workflows](#reusable-workflows) +6. [Composite Actions](#composite-actions) + +--- + +## Workflow Overview + +### Trigger Summary + +| Workflow | PR | Main | Schedule | Manual | Branch Create | +|----------|-----|------|----------|--------|---------------| +| `ci-pull-request.yml` | ✓ | | | | | +| `ci-main.yml` | | ✓ | | | | +| `scheduled-nightly.yml` | | ✓ | 23:30 UTC | ✓ | | +| `release-docker.yml` | | | | ✓ | release/* | +| `release-conda.yml` | | | | ✓ | release/* | +| `release-pypi.yml` | | | | ✓ | release/* | +| `build-docs.yml` | | ✓ | | ✓ | | +| `docs-deploy.yml` | | | | ✓ | (manual only) | + +--- + +## Continuous Integration + +### `ci-pull-request.yml` + +**Purpose**: Validates pull requests before merge + +**Triggers**: +- Pull request (opened, synchronize, reopened) +- Pull request target (for external contributors) + +**Jobs**: + +#### 1. `pre-commit` +- **Runs**: Pre-commit hooks (linting, formatting) +- **Runner**: `ubuntu-latest` +- **Fast fail**: Yes (runs first) + +#### 2. `docker-build-test` +- **Runs**: Docker build for x86_64 +- **Platform**: `linux/amd64` +- **Image tag**: `nv-ingest:pr-{number}` +- **Push**: No (local only) +- **Base**: Ubuntu Jammy (public) + +#### 3. `docker-test` +- **Runs**: Full pytest suite +- **Coverage**: Enabled +- **Markers**: Excludes integration tests +- **Artifacts**: Coverage reports + +#### 4. `library-mode-build` + `library-mode-test` +- **Runs**: Integration tests (conda-based) +- **Requires**: Approval for external contributors +- **Access Control**: + - Auto-runs for MEMBER/COLLABORATOR/OWNER + - Requires `ok-to-test` label for others +- **Timeout**: 60 minutes +- **Dependencies**: Multiple NVIDIA NIMs + +**Status Checks**: +- Required: `pre-commit`, `docker-test` +- Optional: `library-mode-test` + +--- + +### `ci-main.yml` + +**Purpose**: Validates main branch commits and tests multi-platform builds + +**Triggers**: +- Push to `main` branch + +**Jobs**: + +#### 1. `pre-commit` +- Same as PR workflow + +#### 2. `docker-build` + `docker-test` +- **Platform**: `linux/amd64` +- **Full coverage**: Yes +- **Image tag**: `nv-ingest:main-{sha}` + +#### 3. `docker-build-arm` + `docker-test-arm` +- **Platform**: `linux/arm64` +- **Emulation**: QEMU +- **Testing**: Random 100 tests (faster) +- **Non-blocking**: Runs in parallel + +#### 4. `library-mode-build` + `library-mode-test` +- **Always runs**: No approval needed (trusted branch) +- Full integration test suite + +**Parallelization**: +- ARM and x86 builds run in parallel +- Tests run after respective builds complete + +--- + +## Nightly Builds + +### `scheduled-nightly.yml` + +**Purpose**: Automated nightly builds and publishing + +**Triggers**: +- Schedule: Daily at 23:30 UTC +- Push to `main` (optional) +- Manual dispatch with skip options + +**Manual Inputs**: +- `skip-docker`: Skip Docker build/publish +- `skip-conda`: Skip Conda build/publish +- `skip-pypi`: Skip PyPI build/publish + +**Jobs**: + +#### 1. `determine-version` +- Generates version from date: `YYYY.MM.DD` +- Used by all downstream jobs + +#### 2. `docker-build-publish` +- **Platforms**: `linux/amd64,linux/arm64` +- **Registry**: NGC (`nvcr.io`) +- **Tag**: `nv-ingest:YYYY.MM.DD` +- **Push**: Yes +- **Multi-platform**: Yes (buildx) + +#### 3. `conda-build` + `conda-publish` +- **Channel**: `dev` +- **Version**: Date-based (YYYY.MM.DD) +- **Packages**: All nv-ingest conda packages +- **Force upload**: Yes + +#### 4. `pypi-build` + `pypi-publish` +- **Release type**: `dev` +- **Version**: Date-based +- **Packages**: + - `nv-ingest-api` + - `nv-ingest-client` + - `nv-ingest` (service) +- **Repository**: Artifactory + +**Dependencies**: All jobs independent (run in parallel after version determination) + +--- + +## Release Workflows + +### `release-docker.yml` + +**Purpose**: Publish official Docker release images + +**Triggers**: +- Automatic: Branch creation matching `release/*` +- Manual: workflow_dispatch + +**Manual Inputs**: +- `version`: Version string (e.g., `25.4.0`) +- `source-ref`: Git ref to build from (default: `main`) + +**Version Determination**: +- **Automatic**: Extracted from branch name (`release/25.4.0` → `25.4.0`) +- **Manual**: Uses input version + +**Build Details**: +- **Platforms**: `linux/amd64,linux/arm64` +- **Registry**: NGC +- **Tag**: `nv-ingest:{version}` +- **Example**: `nv-ingest:25.4.0` + +**Usage Examples**: +```bash +# Automatic trigger +git checkout -b release/25.4.0 +git push origin release/25.4.0 + +# Manual trigger (GitHub UI) +Actions → Release - Docker → Run workflow + Version: 25.4.0 + Source ref: main +``` + +--- + +### `release-conda.yml` + +**Purpose**: Publish conda packages to RapidsAI channels + +**Triggers**: +- Automatic: Branch creation matching `release/*` +- Manual: workflow_dispatch + +**Optional Inputs** (manual dispatch): +- `version`: Version string (default: extracted from branch name) +- `channel`: Target channel (default: `main`, options: `dev` or `main`) +- `source-ref`: Git ref to build from (default: release branch or `main`) + +**Version Determination**: +- **Automatic**: Extracted from branch name (`release/25.4.0` → `25.4.0`) +- **Manual**: Uses input version or falls back to branch extraction + +**Build Details**: +- **Container**: `rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.12` +- **Packages**: All nv-ingest conda packages +- **Force upload**: Yes (overwrites existing) +- **Default channel**: `main` (for release branches) + +**Usage Examples**: +```bash +# Automatic trigger (recommended) +git checkout -b release/25.4.0 +git push origin release/25.4.0 +# → Publishes to main channel automatically + +# Manual trigger (for custom options) +Actions → Release - Conda → Run workflow + Version: 25.4.0 + Channel: dev (for testing) or main + Source ref: main +``` + +**Channels**: +- `dev`: Development/testing releases +- `main`: Production releases (default for release branches) + +--- + +### `release-pypi.yml` + +**Purpose**: Publish Python wheels to PyPI/Artifactory + +**Triggers**: +- Automatic: Branch creation matching `release/*` +- Manual: workflow_dispatch + +**Optional Inputs** (manual dispatch): +- `version`: Version string (default: extracted from branch name) +- `release-type`: Type (default: `release`, options: `dev` or `release`) +- `source-ref`: Git ref to build from (default: release branch or `main`) + +**Version Determination**: +- **Automatic**: Extracted from branch name (`release/25.4.0` → `25.4.0`) +- **Manual**: Uses input version or falls back to branch extraction + +**Build Details**: +- **Container**: `rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.12` +- **Packages built**: + - `nv-ingest-api` (from `api/`) + - `nv-ingest-client` (from `client/`) + - `nv-ingest` (from `src/`) +- **Artifacts**: Wheels (.whl) and source distributions (.tar.gz) +- **Default release type**: `release` (for release branches) + +**Usage Examples**: +```bash +# Automatic trigger (recommended) +git checkout -b release/25.4.0 +git push origin release/25.4.0 +# → Publishes as release type automatically + +# Manual trigger (for custom options) +Actions → Release - PyPI → Run workflow + Version: 25.4.0 + Release type: dev (for testing) or release + Source ref: main +``` + +**Release Types**: +- `dev`: Development releases (with dev suffix) +- `release`: Production releases (default for release branches) + +--- + +### `build-docs.yml` + +**Purpose**: Build and deploy documentation to GitHub Pages (primary docs workflow) + +**Triggers**: +- Push to `main` +- Manual dispatch + +**Process**: +1. Build docs Docker image (target: `docs`) +2. Run container to generate static site (`make docs` in `/workspace/docs`) +3. Copy generated site from container (`/workspace/docs/site` → `./generated-site`) +4. Verify `index.html` exists in generated site +5. Upload artifact (`github-pages`) and deploy via `deploy-pages` + +**Output**: https://nvidia.github.io/nv-ingest/ + +**Required repo setting**: Settings → Pages → Build and deployment → Source must be **"GitHub Actions"** (not "Deploy from a branch"), or the site will not update. + +**Permissions**: +- `contents: read` +- `pages: write` +- `id-token: write` + +**Concurrency**: Single deployment (no cancellation) + +**Troubleshooting (docs not updating)**: +1. **Pages source**: Repo **Settings → Pages → Build and deployment → Source** must be **"GitHub Actions"**. If it is "Deploy from a branch", the workflow uploads an artifact but GitHub will not deploy it to the live site. +2. **Workflow enabled**: In **Actions**, ensure "Build NV-Ingest Documentation" is present and not disabled. Workflows in `.github/workflows/` on the default branch are enabled by default. +3. **Build failures**: Check the **build** job logs; the "Verify generated site" step will fail with a clear error if the container did not produce `site/index.html`. Fix any `make docs` errors (e.g. missing Python deps, Sphinx/MkDocs errors) in the docs Docker stage. +4. **Deploy job**: The **deploy** job needs the `github-pages` artifact from the build job; it uses the `github-pages` environment. Ensure the environment exists (it is usually auto-created). + +--- + +### `docs-deploy.yml` + +**Purpose**: Same as `build-docs.yml`; manual trigger only (no push trigger to avoid duplicate deployments). + +**Triggers**: +- Manual dispatch only + +**Process**: Same as `build-docs.yml`. + +--- + +## Reusable Workflows + +### `reusable-docker-build.yml` + +**Purpose**: Reusable Docker image build logic + +**Inputs**: +- `platform`: Target platform(s) (default: `linux/amd64`) +- `target`: Docker build stage (default: `runtime`) +- `push`: Push to registry (default: `false`) +- `tags`: Image tags, comma-separated +- `base-image`: Base image name (default: `ubuntu`) +- `base-image-tag`: Base image tag +- `runner`: GitHub runner (default: `linux-large-disk`) +- `use-qemu`: Enable QEMU for cross-platform +- `registry`: Docker registry URL (optional) + +**Secrets**: +- `HF_ACCESS_TOKEN`: Hugging Face token +- `DOCKER_PASSWORD`: Registry password + +**Outputs**: +- `image-digest`: Built image digest + +**Features**: +- Automatic buildx setup for multi-platform +- Conditional QEMU setup +- Flexible tag support +- Registry login (if push enabled) + +--- + +### `reusable-docker-test.yml` + +**Purpose**: Run tests in Docker containers + +**Inputs**: +- `image-tag`: Docker image to test +- `platform`: Platform to test on +- `test-selection`: `full`, `random`, or marker-based +- `random-count`: Number of random tests +- `pytest-markers`: Pytest marker expression +- `coverage`: Enable coverage report +- `runner`: GitHub runner + +**Artifacts**: +- Coverage reports (if enabled) +- Test reports (always) + +**Usage Example**: +```yaml +test-arm: + uses: ./.github/workflows/reusable-docker-test.yml + with: + image-tag: 'nv-ingest:test' + platform: 'linux/arm64' + test-selection: 'random' + random-count: '100' +``` + +--- + +### `reusable-conda-build.yml` + +**Purpose**: Build conda packages + +**Inputs**: +- `version`: Explicit version (optional) +- `source-ref`: Git ref to build from +- `runner`: GitHub runner +- `upload-artifacts`: Upload build artifacts + +**Container**: `rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.12` + +**Outputs**: +- `package-path`: Path to built packages + +**Artifacts**: Conda packages (if upload enabled) + +--- + +### `reusable-conda-publish.yml` + +**Purpose**: Publish conda packages + +**Inputs**: +- `channel`: Target channel (`dev` or `main`) +- `package-path`: Path to packages +- `force-upload`: Overwrite existing packages + +**Secrets**: +- `NVIDIA_CONDA_TOKEN`: Anaconda authentication + +**Validation**: Ensures channel is `dev` or `main` + +--- + +### `reusable-pypi-build.yml` + +**Purpose**: Build Python wheels + +**Inputs**: +- `version`: Explicit version (optional, date if omitted) +- `release-type`: `dev` or `release` +- `source-ref`: Git ref to build from +- `runner`: GitHub runner + +**Outputs**: +- `version`: Version that was built + +**Artifacts**: Python wheels and source distributions + +**Process**: +1. Installs build dependencies +2. Builds all three packages (api, client, service) +3. Uploads artifacts for publishing + +--- + +### `reusable-pypi-publish.yml` + +**Purpose**: Publish Python wheels to Artifactory + +**Inputs**: +- `repository-url`: PyPI repository URL + +**Secrets**: +- `ARTIFACTORY_URL`: Repository URL +- `ARTIFACTORY_USERNAME`: Auth username +- `ARTIFACTORY_PASSWORD`: Auth password + +**Process**: +1. Downloads wheel artifacts +2. Installs twine +3. Publishes all packages + +--- + +### `reusable-integration-test.yml` + +**Purpose**: Run integration tests with conda environment + +**Inputs**: +- `runner`: GitHub runner +- `python-version`: Python version (default: `3.12.11`) +- `timeout-minutes`: Job timeout (default: 60) + +**Secrets**: Multiple NVIDIA NIM and service endpoints + +**Process**: +1. Download conda packages (from artifacts) +2. Setup Miniconda +3. Install packages and dependencies +4. Run integration tests + +**Dependencies**: +- NVIDIA NIMs (audio, VLM, OCR, YOLOX) +- Milvus +- Various Python packages + +--- + +## Composite Actions + +### `setup-docker-buildx` + +**Purpose**: Setup Docker Buildx with optional QEMU + +**Inputs**: +- `use-qemu`: Enable QEMU emulation (default: `false`) +- `platforms`: Supported platforms (default: `linux/amd64`) + +**Steps**: +1. Setup QEMU (if enabled) +2. Setup Docker Buildx + +**Usage**: +```yaml +- uses: ./.github/actions/setup-docker-buildx + with: + use-qemu: 'true' + platforms: 'linux/amd64,linux/arm64' +``` + +--- + +### `docker-login-ngc` + +**Purpose**: Authenticate with NGC registry + +**Inputs**: +- `registry`: Registry URL (default: `nvcr.io`) +- `password`: NGC API token (required) + +**Usage**: +```yaml +- uses: ./.github/actions/docker-login-ngc + with: + password: ${{ secrets.DOCKER_PASSWORD }} +``` + +--- + +### `determine-version` + +**Purpose**: Determine version from various sources + +**Inputs**: +- `version`: Explicit version (optional) +- `date-format`: Date format for auto-generation +- `branch-name`: Branch name to extract from + +**Outputs**: +- `version`: Determined version string + +**Priority**: +1. Explicit version input +2. Extract from branch name (release/*) +3. Generate from date + +**Usage**: +```yaml +- id: version + uses: ./.github/actions/determine-version + with: + date-format: '%Y.%m.%d' + +- run: echo "Version is ${{ steps.version.outputs.version }}" +``` + +--- + +## Quick Reference + +### Common Tasks + +#### Run PR validation locally +```bash +# Pre-commit checks +pre-commit run --all-files + +# Docker build +docker build --target runtime -t nv-ingest:test . + +# Run tests +docker run nv-ingest:test pytest -m "not integration" +``` + +#### Trigger nightly build manually +```bash +# GitHub UI +Actions → Nightly Builds & Publishing → Run workflow + Branch: main +``` + +#### Create a release +```bash +# Automatic - All three artifact types (recommended) +git checkout -b release/25.4.0 +git push origin release/25.4.0 +# → Automatically triggers: +# - Docker (multi-platform) +# - Conda (main channel) +# - PyPI (release type) + +# Manual - For custom options +Actions → Release - Docker/Conda/PyPI → Run workflow +``` + +#### Debug workflow issues +```bash +# View workflow runs +Actions → Select workflow → View runs + +# Download artifacts +Actions → Workflow run → Artifacts section + +# Re-run failed jobs +Actions → Workflow run → Re-run failed jobs +``` + +--- + +## Best Practices + +1. **Always test workflows locally** when possible +2. **Use manual dispatch** for testing workflow changes +3. **Check artifacts** for build outputs and logs +4. **Monitor first run** after workflow changes +5. **Use skip flags** in nightly builds during maintenance +6. **Label external PRs** with `ok-to-test` after review +7. **Create release branches** from tested commits +8. **Verify secrets** are available before running workflows + +--- + +## Maintenance + +### Updating Docker Base Images + +Edit base image references in workflows or create a workflow variable. + +Current: `ubuntu:jammy-20250415.1` + +### Updating Runner Types + +Change `runner:` inputs in workflow calls: +- `ubuntu-latest`: Small jobs, public images +- `linux-large-disk`: Large Docker builds + +### Updating Python/Conda Versions + +Edit container images in reusable workflows: +- Current: `rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.12` + +### Adding New Secrets + +1. Add to repository secrets (Settings → Secrets → Actions) +2. Add to workflow secrets declarations +3. Pass through reusable workflow calls + +--- + +## Support + +For issues or questions: +- Check workflow logs in Actions tab +- Review this documentation +- Check migration guide: `WORKFLOWS_MIGRATION.md` +- Contact DevOps team diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index 173e96111..699205595 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -1,6 +1,9 @@ name: Build NV-Ingest Documentation -# Trigger for pull requests and pushing to main +# Publishes to https://nvidia.github.io/nv-ingest/ +# REQUIRED: In repo Settings → Pages → Build and deployment → Source, select "GitHub Actions" +# (not "Deploy from a branch"), or this workflow will not update the live site. + on: # Runs on pushes targeting the default branch push: @@ -52,13 +55,24 @@ jobs: - name: Copy generated docs site from the container run: docker cp $CONTAINER_ID:/workspace/docs/site ./generated-site + - name: Verify generated site + run: | + if [ ! -f ./generated-site/index.html ]; then + echo "::error::Generated site missing index.html. Build may have failed inside the container." + ls -la ./generated-site/ || true + exit 1 + fi + echo "Generated site contains $(find ./generated-site -type f | wc -l) files" + - name: Stop and remove the container - run: docker rm $CONTAINER_ID + if: always() + run: docker rm -f $CONTAINER_ID 2>/dev/null || true - name: Upload Site Artifacts uses: actions/upload-pages-artifact@v3 with: path: ./generated-site + name: github-pages deploy: needs: diff --git a/.github/workflows/docs-deploy.yml b/.github/workflows/docs-deploy.yml index 023859880..c0905ce93 100644 --- a/.github/workflows/docs-deploy.yml +++ b/.github/workflows/docs-deploy.yml @@ -1,9 +1,8 @@ name: Documentation Deploy +# NOTE: Docs are built and deployed by build-docs.yml on push to main. +# This workflow is kept for manual runs only; remove push trigger to avoid duplicate deployments. on: - push: - branches: - - main workflow_dispatch: permissions: @@ -50,6 +49,15 @@ jobs: - name: Copy generated docs from container run: docker cp $CONTAINER_ID:/workspace/docs/site ./generated-site + - name: Verify generated site + run: | + if [ ! -f ./generated-site/index.html ]; then + echo "::error::Generated site missing index.html. Build may have failed inside the container." + ls -la ./generated-site/ || true + exit 1 + fi + echo "Generated site contains $(find ./generated-site -type f | wc -l) files" + - name: Clean up container run: docker rm $CONTAINER_ID @@ -57,6 +65,7 @@ jobs: uses: actions/upload-pages-artifact@v3 with: path: ./generated-site + name: github-pages deploy: name: Deploy to GitHub Pages diff --git a/Dockerfile b/Dockerfile index a0db9803b..886924479 100644 --- a/Dockerfile +++ b/Dockerfile @@ -168,6 +168,17 @@ CMD ["/bin/bash"] FROM nv_ingest_install AS docs +# Install dependencies needed for docs generation +# +# NOTE: The nv_ingest_install base image may carry a broken apt/dpkg state +# (e.g., partially-installed libreoffice dependencies). Installing `make` +# via conda avoids apt entirely and is more reliable on CI runners. +# Do not run mamba clean -afy here: /opt/conda/pkgs is a cache mount and +# cannot be removed (Device or resource busy). +RUN --mount=type=cache,target=/opt/conda/pkgs \ + source activate nv_ingest_runtime \ + && mamba install -y make + COPY docs docs # Docs needs all the source code present so add it to the container