llama-stack/.github/workflows/integration-tests.yml at main · EleanorWho/llama-stack · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
name: Integration Tests (Replay)

run-name: Run the integration test suites from tests/integration in replay mode

on:
  push:
    branches:
      - main
      - 'release-[0-9]+.[0-9]+.x'
  pull_request:
    branches:
      - main
      - 'release-[0-9]+.[0-9]+.x'
    types: [opened, synchronize, reopened]
    paths:
      - 'src/llama_stack/**'
      - '!src/llama_stack_ui/**'
      - 'tests/**'
      - 'uv.lock'
      - 'pyproject.toml'
      - '.github/workflows/integration-tests.yml' # This workflow
      - '.github/actions/setup-ollama/action.yml'
      - '.github/actions/setup-test-environment/action.yml'
      - '.github/actions/run-and-record-tests/action.yml'
      - 'scripts/integration-tests.sh'
      - 'scripts/generate_ci_matrix.py'
  schedule:
    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
  workflow_dispatch:
    inputs:
      test-all-client-versions:
        description: 'Test against both the latest and published versions'
        type: boolean
        default: false
      test-setup:
        description: 'Test against a specific setup'
        type: string
        default: 'ollama'
  workflow_call:
    inputs:
      sdk_install_url:
        required: false
        type: string
        description: 'URL to install Python SDK from (for testing preview builds)'
      matrix_key:
        required: false
        type: string
        default: 'default'
        description: 'Matrix configuration key from ci_matrix.json (e.g., "default", "stainless")'
      matrix_json:
        required: false
        type: string
        description: 'Pre-defined matrix JSON. If provided, skips generate_ci_matrix.py execution.'
      pr_head_sha:
        required: false
        type: string
        description: 'The SHA of the pull request head to checkout'
      pr_head_ref:
        required: false
        type: string
        description: 'The branch name of the pull request head (for recording commits)'
      is_fork_pr:
        required: false
        type: boolean
        default: false
        description: 'Whether this is a fork PR (cannot push recordings to forks)'
      disable_cache:
        required: false
        type: boolean
        default: false
        description: 'Disable caching (for security in pull_request_target contexts)'
      test-all-client-versions:
        required: false
        type: boolean
        default: false
        description: 'Test against both the latest and published versions'

concurrency:
  # Skip concurrency for pushes to main - each commit should be tested independently
  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
  cancel-in-progress: true

# Limit permissions of the GITHUB_TOKEN to the minimum required.
# Default mode is 'replay' which only needs read access.
# When called via workflow_call (e.g., from stainless-builds.yml with record-if-missing),
# the caller's permissions apply.
permissions:
  contents: read

jobs:
  generate-matrix:
    # Skip matrix generation if matrix_json is provided (e.g., from pull_request_target callers)
    if: ${{ inputs.matrix_json == '' }}
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
        with:
          ref: ${{ inputs.pr_head_sha || github.event.pull_request.head.sha || github.sha }}

      - name: Generate test matrix
        id: set-matrix
        run: |
          # Generate matrix from CI_MATRIX in tests/integration/ci_matrix.json
          # Supports schedule-based, manual input, and workflow_call overrides
          MATRIX=$(PYTHONPATH=. python3 scripts/generate_ci_matrix.py \
            --schedule "${{ github.event.schedule }}" \
            --test-setup "${{ github.event.inputs.test-setup || '' }}" \
            --matrix-key "${{ inputs.matrix_key || 'default' }}")
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
          echo "Generated matrix: $MATRIX"

  run-replay-mode-tests:
    needs: generate-matrix
    # Always run even if generate-matrix was skipped (when matrix_json is provided)
    if: ${{ !cancelled() }}
    runs-on: ubuntu-latest
    # When disable_cache is true, set UV_NO_CACHE to prevent uv from using cached packages.
    # This is a security measure for pull_request_target contexts to prevent cache poisoning.
    env:
      UV_NO_CACHE: ${{ inputs.disable_cache == true }}
    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}

    strategy:
      fail-fast: false
      matrix:
        client: [library, docker, server]
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        node-version: [22]
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true' || inputs.test-all-client-versions == true) && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
        # Test configurations: Either from matrix_json input or generated from ci_matrix.json
        config: ${{ fromJSON(inputs.matrix_json || needs.generate-matrix.outputs.matrix).include }}

    steps:
      - name: Checkout repository
        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
        with:
          ref: ${{ inputs.pr_head_sha || github.event.pull_request.head.sha || github.sha }}

      # Note: Using full repo path with pinned SHA ensures actions are loaded from
      # a trusted commit, not from PR checkout. This is critical for security when
      # called from pull_request_target workflows.
      - name: Setup test environment
        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
        uses: llamastack/llama-stack/.github/actions/setup-test-environment@76bcb6657de312160c726fbe069275cd5537b702
        with:
          python-version: ${{ matrix.python-version }}
          client-version: ${{ matrix.client-version }}
          sdk_install_url: ${{ inputs.sdk_install_url || '' }}
          setup: ${{ matrix.config.setup }}
          suite: ${{ matrix.config.suite }}
          inference-mode: ${{ matrix.config.inference_mode || 'replay' }}

      - name: Setup Node.js for TypeScript client tests
        if: ${{ matrix.client == 'server' }}
        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
        with:
          node-version: ${{matrix.node-version}}
          cache: ${{ inputs.disable_cache && '' || 'npm' }}
          cache-dependency-path: tests/integration/client-typescript/package-lock.json
          package-manager-cache: ${{ !inputs.disable_cache }}

      - name: Setup TypeScript client
        if: ${{ matrix.client == 'server' }}
        id: setup-ts-client
        uses: llamastack/llama-stack/.github/actions/setup-typescript-client@76bcb6657de312160c726fbe069275cd5537b702
        with:
          client-version: ${{ matrix.client-version }}

      - name: Run tests
        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
        uses: llamastack/llama-stack/.github/actions/run-and-record-tests@76bcb6657de312160c726fbe069275cd5537b702
        env:
          OPENAI_API_KEY: dummy
          AWS_BEARER_TOKEN_BEDROCK: replay-mode-dummy-key
          AWS_DEFAULT_REGION: us-west-2
          TS_CLIENT_PATH: ${{ steps.setup-ts-client.outputs.ts-client-path || '' }}
        with:
          stack-config: >-
            ${{ matrix.config.stack_config
                || (matrix.client == 'library' && 'ci-tests')
                || (matrix.client == 'server' && 'server:ci-tests')
                || 'docker:ci-tests' }}
          setup: ${{ matrix.config.setup }}
          inference-mode: ${{ matrix.config.inference_mode || 'replay' }}
          suite: ${{ matrix.config.suite }}
          target-branch: ${{ inputs.pr_head_ref || '' }}
          is-fork-pr: ${{ inputs.is_fork_pr && 'true' || (github.event.pull_request.head.repo.full_name != github.repository && 'true' || 'false') }}