llama-stack/.github/workflows/integration-vector-io-tests.yml at main · EleanorWho/llama-stack · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
name: Vector IO Integration Tests

run-name: Run the integration test suite with various VectorIO providers

on:
  push:
    branches:
      - main
      - 'release-[0-9]+.[0-9]+.x'
  pull_request:
    branches:
      - main
      - 'release-[0-9]+.[0-9]+.x'
    paths:
      - 'src/llama_stack/**'
      - '!src/llama_stack_ui/**'
      - 'tests/integration/vector_io/**'
      - 'tests/integration/conftest.py' # Test configuration that affects vector IO tests
      - 'uv.lock'
      - 'pyproject.toml'
      - 'requirements.txt'
      - '.github/workflows/integration-vector-io-tests.yml' # This workflow
  schedule:
    - cron: '0 0 * * *'  # (test on python 3.13) Daily at 12 AM UTC

concurrency:
  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
  cancel-in-progress: true

jobs:
  test-matrix:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector", "remote::weaviate", "remote::qdrant", "remote::elasticsearch"]
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
      fail-fast: false # we want to run all tests regardless of failure

    steps:
      - name: Checkout repository
        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1

      - name: Install dependencies
        uses: ./.github/actions/setup-runner
        with:
          python-version: ${{ matrix.python-version }}

      - name: Setup Chroma
        if: matrix.vector-io-provider == 'remote::chromadb'
        run: |
          docker run --rm -d --pull always \
            --name chromadb \
            -p 8000:8000 \
            -v ~/chroma:/chroma/chroma \
            -e IS_PERSISTENT=TRUE \
            -e ANONYMIZED_TELEMETRY=FALSE \
            chromadb/chroma:latest

      - name: Setup Weaviate
        if: matrix.vector-io-provider == 'remote::weaviate'
        run: |
          docker run --rm -d --pull always \
          --name weaviate \
          -p 8080:8080 -p 50051:50051 \
          cr.weaviate.io/semitechnologies/weaviate:1.32.0

      - name: Start PGVector DB
        if: matrix.vector-io-provider == 'remote::pgvector'
        run: |
          docker run -d \
            --name pgvector \
            -e POSTGRES_USER=llamastack \
            -e POSTGRES_PASSWORD=llamastack \
            -e POSTGRES_DB=llamastack \
            -p 5432:5432 \
            pgvector/pgvector:pg17

      - name: Wait for PGVector to be ready
        if: matrix.vector-io-provider == 'remote::pgvector'
        run: |
          echo "Waiting for Postgres to be ready..."
          for i in {1..30}; do
            if docker exec pgvector pg_isready -U llamastack > /dev/null 2>&1; then
              echo "Postgres is ready!"
              break
            fi
            echo "Not ready yet... ($i)"
            sleep 1
          done

      - name: Enable pgvector extension
        if: matrix.vector-io-provider == 'remote::pgvector'
        run: |
          PGPASSWORD=llamastack psql -h localhost -U llamastack -d llamastack \
            -c "CREATE EXTENSION IF NOT EXISTS vector;"

      - name: Setup Qdrant
        if: matrix.vector-io-provider == 'remote::qdrant'
        run: |
          docker run --rm -d --pull always \
            --name qdrant \
            -p 6333:6333 \
            qdrant/qdrant

      - name: Wait for Qdrant to be ready
        if: matrix.vector-io-provider == 'remote::qdrant'
        run: |
          echo "Waiting for Qdrant to be ready..."
          for i in {1..30}; do
            if curl -s http://localhost:6333/collections | grep -q '"status":"ok"'; then
              echo "Qdrant is ready!"
              exit 0
            fi
            sleep 2
          done
          echo "Qdrant failed to start"
          docker logs qdrant
          exit 1

      - name: Wait for ChromaDB to be ready
        if: matrix.vector-io-provider == 'remote::chromadb'
        run: |
          echo "Waiting for ChromaDB to be ready..."
          for i in {1..30}; do
            if curl -s http://localhost:8000/api/v2/heartbeat | grep -q "nanosecond heartbeat"; then
              echo "ChromaDB is ready!"
              exit 0
            fi
            sleep 2
          done
          echo "ChromaDB failed to start"
          docker logs chromadb
          exit 1

      - name: Wait for Weaviate to be ready
        if: matrix.vector-io-provider == 'remote::weaviate'
        run: |
          echo "Waiting for Weaviate to be ready..."
          for i in {1..30}; do
            if curl -s http://localhost:8080 | grep -q "https://weaviate.io/developers/weaviate/current/"; then
              echo "Weaviate is ready!"
              exit 0
            fi
            sleep 2
          done
          echo "Weaviate failed to start"
          docker logs weaviate
          exit 1

      - name: Setup Elasticsearch
        if: matrix.vector-io-provider == 'remote::elasticsearch'
        id: setup-elasticsearch
        run: |
          curl -fsSL https://elastic.co/start-local | sh -s -- -v 9.2.0 --esonly
          source elastic-start-local/.env
          echo "ELASTICSEARCH_API_KEY=$ES_LOCAL_API_KEY" >> "$GITHUB_ENV"

      - name: Build Llama Stack
        run: |
          uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install

      - name: Check Storage and Memory Available Before Tests
        if: ${{ always() }}
        run: |
          free -h
          df -h

      - name: Run Vector IO Integration Tests
        env:
          ENABLE_CHROMADB: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'true' || '' }}
          CHROMADB_URL: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'http://localhost:8000' || '' }}
          ENABLE_PGVECTOR: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'true' || '' }}
          PGVECTOR_HOST: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'localhost' || '' }}
          PGVECTOR_PORT: ${{ matrix.vector-io-provider == 'remote::pgvector' && '5432' || '' }}
          PGVECTOR_DB: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
          PGVECTOR_USER: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
          PGVECTOR_PASSWORD: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
          ENABLE_QDRANT: ${{ matrix.vector-io-provider == 'remote::qdrant' && 'true' || '' }}
          QDRANT_URL: ${{ matrix.vector-io-provider == 'remote::qdrant' && 'http://localhost:6333' || '' }}
          ENABLE_WEAVIATE: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'true' || '' }}
          WEAVIATE_CLUSTER_URL: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'localhost:8080' || '' }}
          ELASTICSEARCH_URL: ${{ matrix.vector-io-provider == 'remote::elasticsearch' && 'http://localhost:9200' || '' }}
        run: |
          uv run --no-sync \
            pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
            tests/integration/vector_io

      - name: Check Storage and Memory Available After Tests
        if: ${{ always() }}
        run: |
          free -h
          df -h

      - name: Create sanitized provider name
        if: ${{ always() }}
        run: |
          echo "SANITIZED_PROVIDER=$(echo "${{ matrix.vector-io-provider }}" | tr ':' '_')" >> $GITHUB_ENV

      - name: Write ChromaDB logs to file
        if: ${{ always() && matrix.vector-io-provider == 'remote::chromadb' }}
        run: |
          docker logs chromadb > chromadb.log

      - name: Write Qdrant logs to file
        if: ${{ always() && matrix.vector-io-provider == 'remote::qdrant' }}
        run: |
          docker logs qdrant > qdrant.log

      - name: Write Elasticsearch logs to file
        if: ${{ always() && matrix.vector-io-provider == 'remote::elasticsearch' }}
        run: |
          docker logs es-local-dev > elasticsearch.log

      - name: Upload all logs to artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
        with:
          name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
          path: |
            *.log
          retention-days: 1