diff --git a/README.md b/README.md index 266986d..56b8fa4 100644 --- a/README.md +++ b/README.md @@ -2,30 +2,52 @@ This repo is my setup/notes for running openshift/installer through ci-operator. I use this to launch a cluster in AWS via openshift/installer, and to run CI against the cluster. There are a few minor additions to the templates in this repo from those running in CI; I've -added a secret to transfer aws credentials, quay pull secret and the tectonic config. +added a secret to transfer aws credentials and quay pull secret. First, get a token and login to the OpenShift CI cluster. -Next: Note the files required for the cluster-secrets-aws secret and create them/change paths -accordingly. The ssh key-pair is optional, and is required for OpenShift conformance tests to -pass. - -Then: -1. oc new-project my-namespace -3. oc create secret -n your-namespace generic cluster-secrets-aws \ - --from-file=secret/credentials \ - --from-file=secret/pull-secret \ - --from-file=secret/ssh-privatekey \ - --from-file=secret/ssh-publickey \ - -o yaml --dry-run | oc -n your-namespace apply -f - -4. ci-operator -template templates/cluster-launch-installer-e2e-new.yaml \ +Next: Note the files required for the cluster-secrets-aws secret and populate their content. +The ssh key-pair is meant to be generated/only used for ci-testing. The private +key is used only when running the full conformance test suite, so that's optional. + +Pick a short namespace name to avoid: + +> the S3 bucket name +> "wking-next-gen-installer-ef260.origin-ci-int-aws.dev.rhcloud.com", +> generated from the cluster name and base domain, is too long; S3 +> bucket names must be less than 63 characters; please choose a +> shorter cluster name or base domain + +It looks like 22 characters is the max with the +`${NAMESPACE}-${JOB_NAME_HASH}` approach to cluster naming. + +Then: + +Create a new project in https://api.ci.openshift.org: + ```bash +oc new-project your-namespace +``` + +Fill in contents of cluster-secrets-aws directory with contents of files: + +(see files in `cluster-secrets-aws` directory for more info) + ```bash + credentials (see noted format in this repo) + pull-secret (quay pull secret json config) + ssh-privatekey (only required for full conformance tests, not required for aws-e2e tests) + ssh-publickey (required by installer) +``` + +Now run the ci-operator command: +```bash +ci-operator -template templates/cluster-launch-installer-e2e-new.yaml \ -config /path/to/openshift/release/ci-operator/config/openshift/installer/master.yaml \ -git-ref=your-gh-username/installer@your-branch \ + -secret-dir=/path/to/cluster-secrets-aws \ -namespace=your-namespace +``` You can then access your project in the CI web console, check the pods. -When running the smoke tests, you'll see the output of the smoke tests in -logs of pod/dev container/setup. When running the full test suite, conformance test output will be found via pod logs for pod/dev container/test. @@ -33,9 +55,9 @@ pod logs for pod/dev container/test. You can grab the kubeconfig, copy to your local system, and access the cluster running in AWS. From the CI web console in your project, go to pod/dev container/setup and in the terminal, kubeconfig is at /tmp/shared/cluster/generated/auth/kubeconfig. Copy that to your local system, then -export KUBECONFIG=/path/to/copied/kubeconfig. +`export KUBECONFIG=/path/to/copied/kubeconfig`. Clean up your AWS resources from a cluster!!! -1. git clone git@github.com:openshift/hive.git -2. cd hive; make hiveutil -3. bin/hiveutil aws-tag-deprovision --cluster-name your-cluster-name --loglevel debug tectonicClusterID=see-aws-console-for-tag +1. `git clone git@github.com:openshift/hive.git` +2. `cd hive; make hiveutil` +3. `bin/hiveutil aws-tag-deprovision --cluster-name your-cluster-name --loglevel debug tectonicClusterID=see-aws-console-for-tag` diff --git a/secret/credentials b/cluster-secrets-aws/credentials similarity index 100% rename from secret/credentials rename to cluster-secrets-aws/credentials diff --git a/secret/pull-secret b/cluster-secrets-aws/pull-secret similarity index 100% rename from secret/pull-secret rename to cluster-secrets-aws/pull-secret diff --git a/templates/cluster-launch-installer-e2e-new.yaml b/templates/cluster-launch-installer-e2e-modified.yaml similarity index 92% rename from templates/cluster-launch-installer-e2e-new.yaml rename to templates/cluster-launch-installer-e2e-modified.yaml index ebd5c09..7c96d2d 100644 --- a/templates/cluster-launch-installer-e2e-new.yaml +++ b/templates/cluster-launch-installer-e2e-modified.yaml @@ -7,8 +7,8 @@ parameters: - name: JOB_NAME_HASH required: true - name: NAMESPACE - #required: true - value: "somalley-test" + # MODIFIED HERE, USE YOUR NAMESPACE + value: "your-ns" - name: IMAGE_FORMAT required: true - name: IMAGE_INSTALLER @@ -16,18 +16,18 @@ parameters: - name: IMAGE_TESTS required: true - name: CLUSTER_TYPE - required: true + # MODIFIED HERE, SPECIFY "aws" + #required: true + value: "aws" - name: TEST_FOCUS - value: Suite:openshift/conformance/parallel - name: TEST_SKIP - name: TEST_FOCUS_SERIAL - value: Suite:openshift/conformance/serial - name: TEST_SKIP_SERIAL - name: TEST_PARALLELISM value: "30" # Ensures the release image is created and tested - name: TEST_COMMAND -- name: RELEASE_IMAGE_LATEST + #- name: RELEASE_IMAGE_LATEST objects: @@ -61,22 +61,10 @@ objects: emptyDir: {} - name: shared-tmp emptyDir: {} - - name: cluster-profile - secret: - secretName: ${JOB_NAME_SAFE}-cluster-profile + # MODIFIED HERE, added creds secret - name: pull-secret-aws secret: secretName: cluster-secrets-aws - initContainers: - - name: cli - image: ${IMAGE_CLI} - volumeMounts: - - name: shared-tmp - mountPath: /tmp/shared - command: - - cp - - /usr/bin/oc - - /tmp/shared/oc containers: @@ -93,8 +81,6 @@ objects: volumeMounts: - name: shared-tmp mountPath: /tmp/shared - - name: cluster-profile - mountPath: /tmp/cluster - name: artifacts mountPath: /tmp/artifacts env: @@ -184,10 +170,9 @@ objects: volumeMounts: - name: shared-tmp mountPath: /tmp - - name: cluster-profile - mountPath: /etc/openshift-installer - name: artifacts mountPath: /tmp/artifacts + # MODIFIED HERE, added my creds secret - name: pull-secret-aws mountPath: /tmp/pull-secret-aws env: @@ -196,6 +181,7 @@ objects: - name: TYPE value: ${CLUSTER_TYPE} - name: AWS_SHARED_CREDENTIALS_FILE + # MODIFIED HERE, added path to files in my secret #value: /etc/openshift-installer/.awscred value: /tmp/cluster/credentials - name: KUBECONFIG @@ -211,7 +197,7 @@ objects: trap 'kill $(jobs -p); exit 0' TERM mkdir /tmp/cluster - cp /etc/openshift-installer/* /tmp/cluster/ + cp /tmp/pull-secret-aws/* /tmp/cluster/ mkdir /tmp/artifacts/installer cd /tmp/cluster @@ -221,8 +207,10 @@ objects: OPENSHIFT_INSTALL_BASE_DOMAIN=origin-ci-int-aws.dev.rhcloud.com \ OPENSHIFT_INSTALL_EMAIL_ADDRESS=test@ci.openshift.io \ OPENSHIFT_INSTALL_PASSWORD=$( date +%s | sha256sum | base64 | head -c 32 ; echo ) \ - OPENSHIFT_INSTALL_SSH_PUB_KEY="$( cat /etc/openshift-installer/ssh-publickey )" \ - OPENSHIFT_INSTALL_PULL_SECRET="$( cat /etc/openshift-installer/pull-secret )" \ + # MODIFIED HERE + # changed path to the pull-secret-aws files + OPENSHIFT_INSTALL_SSH_PUB_KEY="$( cat /tmp/cluster/ssh-publickey )" \ + OPENSHIFT_INSTALL_PULL_SECRET="$( cat /tmp/cluster/pull-secret )" \ OPENSHIFT_INSTALL_PLATFORM=aws \ OPENSHIFT_INSTALL_AWS_REGION=us-east-1 \ openshift-install --dir /tmp/artifacts/installer --log-level debug cluster @@ -272,10 +260,11 @@ objects: volumeMounts: - name: shared-tmp mountPath: /tmp/shared - - name: cluster-profile - mountPath: /etc/openshift-installer - name: artifacts mountPath: /tmp/artifacts + # MODIFIED HERE, ADDED SECRET MOUNT + - name: pull-secret-aws + mountPath: /tmp/pull-secret-aws env: - name: INSTANCE_PREFIX value: ${NAMESPACE}-${JOB_NAME_HASH} @@ -336,7 +325,8 @@ objects: wait echo "Deprovisioning cluster ..." - export AWS_SHARED_CREDENTIALS_FILE=/tmp/shared/cluster/credentials + # MODIFIED HERE path + export AWS_SHARED_CREDENTIALS_FILE=/tmp/pull-secret-aws/credentials openshift-install --dir /tmp/artifacts/installer --log-level debug destroy-cluster } @@ -349,4 +339,3 @@ objects: fi sleep 60 & wait done - diff --git a/templates/cluster-launch-installer-e2e-smoke.yaml b/templates/cluster-launch-installer-e2e-smoke.yaml deleted file mode 100644 index c8caff2..0000000 --- a/templates/cluster-launch-installer-e2e-smoke.yaml +++ /dev/null @@ -1,360 +0,0 @@ -kind: Template -apiVersion: template.openshift.io/v1 - -parameters: -- name: JOB_NAME_SAFE - required: true -- name: JOB_NAME_HASH - required: true -- name: NAMESPACE - #required: true - value: "somalley-40" -- name: IMAGE_FORMAT - required: true -- name: IMAGE_INSTALLER - required: true -- name: IMAGE_CLI - required: true -- name: IMAGE_TESTS - required: true -- name: IMAGE_INSTALLER_SMOKE - required: true -- name: CLUSTER_TYPE - #required: true - value: "aws" - -objects: - -# We want the cluster to be able to access these images -- kind: RoleBinding - apiVersion: authorization.openshift.io/v1 - metadata: - name: ${JOB_NAME_SAFE}-image-puller - namespace: ${NAMESPACE} - roleRef: - name: system:image-puller - subjects: - - kind: SystemGroup - name: system:unauthenticated - -# The e2e pod spins up a cluster, runs e2e tests, and then cleans up the cluster. -- kind: Pod - apiVersion: v1 - metadata: - name: ${JOB_NAME_SAFE} - namespace: ${NAMESPACE} - annotations: - # we want to gather the teardown logs no matter what - ci-operator.openshift.io/wait-for-container-artifacts: teardown - spec: - restartPolicy: Never - activeDeadlineSeconds: 10800 - terminationGracePeriodSeconds: 900 - volumes: - - name: artifacts - emptyDir: {} - - name: shared-tmp - emptyDir: {} - - name: cluster-profile - secret: - secretName: ${JOB_NAME_SAFE}-cluster-profile - - name: pull-secret-aws - secret: - secretName: cluster-secrets-aws - - initContainers: - - name: cli - image: ${IMAGE_CLI} - volumeMounts: - - name: shared-tmp - mountPath: /tmp/shared - command: - - cp - - /usr/bin/oc - - /tmp/shared/oc - - name: smoke-test - image: ${IMAGE_INSTALLER_SMOKE} - volumeMounts: - - name: shared-tmp - mountPath: /tmp/shared - command: - - cp - #- /go/src/github.com/openshift/installer/bazel-bin/tests/smoke/linux_amd64_stripped/go_default_test - - /usr/bin/smoke - - /tmp/shared/smoke - - containers: - - # Once admin.kubeconfig exists, executes shared tests - - name: test - image: ${IMAGE_INSTALLER_SMOKE} - resources: - requests: - cpu: 1 - memory: 300Mi - limits: - cpu: 3 - memory: 2Gi - volumeMounts: - - name: shared-tmp - mountPath: /tmp/shared - - name: cluster-profile - mountPath: /tmp/cluster - - name: artifacts - mountPath: /tmp/artifacts - env: - - name: HOME - value: /tmp/home - - name: KUBECONFIG - value: /tmp/admin.kubeconfig - command: - - /bin/bash - - -c - - | - #!/bin/bash - set -euo pipefail - - trap 'touch /tmp/shared/exit' EXIT - trap 'kill $(jobs -p); exit 0' TERM - - mkdir -p "${HOME}" - - # wait until the setup job creates admin.kubeconfig - while true; do - if [[ -f /tmp/shared/exit ]]; then - echo "Another process exited" 2>&1 - exit 1 - fi - if [[ ! -f /tmp/shared/admin.kubeconfig ]]; then - sleep 15 & wait - continue - fi - break - done - echo "Found shared kubeconfig" - - # don't let clients impact the global kubeconfig - cp /tmp/shared/admin.kubeconfig /tmp/admin.kubeconfig - - PATH=/usr/libexec/origin:$PATH - - # set up cloud provider specific env vars - if [[ "${CLUSTER_TYPE}" == "gcp" ]]; then - export GOOGLE_APPLICATION_CREDENTIALS="/tmp/cluster/gce.json" - export KUBE_SSH_USER=cloud-user - mkdir -p ~/.ssh - cp /tmp/cluster/ssh-privatekey ~/.ssh/google_compute_engine || true - export PROVIDER_ARGS='-provider=gce -gce-zone=us-east1-c -gce-project=openshift-gce-devel-ci' - elif [[ "${CLUSTER_TYPE}" == "aws" ]]; then - region="$( python -c 'import sys, json; print json.load(sys.stdin)["tectonic_aws_region"]' /tmp/cluster/inputs.yaml - ) - mkdir /tmp/artifacts/installer - cp /tmp/cluster/inputs.yaml /tmp/artifacts/installer/ - - echo "Invoking installer ..." - - cd /tmp/cluster - ls -al /tmp/pull-secret-aws - cp /tmp/pull-secret-aws/* . - # Uploaded from local system, via secret cluster-secrets-aws - cp /tmp/pull-secret-aws/credentials /tmp/cluster/credentials - - tectonic init --config=inputs.yaml - mv -f ${NAME}/* /tmp/cluster/ - cp config.yaml internal.yaml terraform.tfvars /tmp/artifacts/installer/ - - tectonic install --dir=. --log-level=debug - # wait until oc shows up - while true; do - if [[ -f /tmp/exit ]]; then - echo "Interrupted" - cp $KUBECONFIG /tmp/admin.kubeconfig - exit 1 - fi - if [[ ! -f /tmp/oc ]]; then - echo "Waiting for oc binary to show up ..." - sleep 15 & wait - continue - fi - if ! /tmp/oc get nodes 2>/dev/null; then - echo "Waiting for API at $(/tmp/oc whoami --show-server) to respond ..." - sleep 15 & wait - continue - fi - # check multiple namespaces while we are transitioning to the new locations - if /tmp/oc get deploy/router -n tectonic-ingress 2>/dev/null; then - router_namespace=tectonic-ingress - elif /tmp/oc get deploy/router -n openshift-ingress 2>/dev/null; then - router_namespace=openshift-ingress - elif /tmp/oc get deploy/router -n default 2>/dev/null; then - router_namespace=default - else - echo "Waiting for router to be created ..." - sleep 15 & wait - continue - fi - break - done - if ! /tmp/oc wait deploy/router -n "${router_namespace}" --for condition=available --timeout=20m; then - echo "Installation failed" - cp $KUBECONFIG /tmp/admin.kubeconfig - exit 1 - fi - cp $KUBECONFIG /tmp/admin.kubeconfig - echo "Installation successful" - echo "Starting installer smoke tests..." - export SMOKE_KUBECONFIG=${KUBECONFIG} - export SMOKE_MANIFEST_PATHS=/tmp/cluster/generated/manifests - # 3 masters/3 workers/1 bootstrap(if post remove etcd) - export SMOKE_NODE_COUNT=3 - if ! /tmp/smoke -cluster -test.v; then - echo "Smoke tests failed" - exit 1 - fi - echo "Smoke tests passed" - - # Performs cleanup of all created resources - - name: teardown - image: ${IMAGE_INSTALLER_SMOKE} - volumeMounts: - - name: shared-tmp - mountPath: /tmp/shared - - name: cluster-profile - mountPath: /etc/openshift-installer - - name: artifacts - mountPath: /tmp/artifacts - env: - - name: INSTANCE_PREFIX - value: ${NAMESPACE}-${JOB_NAME_HASH} - - name: TYPE - value: ${CLUSTER_TYPE} - - name: KUBECONFIG - value: /tmp/shared/admin.kubeconfig - command: - - /bin/bash - - -c - - | - #!/bin/bash - function teardown() { - set +e - touch /tmp/shared/exit - export PATH=$PATH:/tmp/shared - - echo "Gathering artifacts ..." - mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics - - oc --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes - oc --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers - oc --request-timeout=5s get nodes -o json > /tmp/artifacts/nodes.json - oc --request-timeout=5s get events --all-namespaces -o json > /tmp/artifacts/events.json - oc --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api - - # gather nodes first in parallel since they may contain the most relevant debugging info - while IFS= read -r i; do - mkdir -p /tmp/artifacts/nodes/$i - ( - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/messages | gzip -c > /tmp/artifacts/nodes/$i/messages.gz - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/journal | sed -e 's|.*href="\(.*\)".*|\1|;t;d' > /tmp/journals - while IFS= read -r j; do - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/journal/${j}system.journal | gzip -c > /tmp/artifacts/nodes/$i/journal.gz - done < /tmp/journals - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/metrics | gzip -c > /tmp/artifacts/metrics/node-$i.gz - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap > /tmp/artifacts/nodes/$i/heap - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/secure | gzip -c > /tmp/artifacts/nodes/$i/secure.gz - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/audit | gzip -c > /tmp/artifacts/nodes/$i/audit.gz - ) & - done < /tmp/nodes - - while IFS= read -r i; do - file="$( echo "$i" | cut -d ' ' -f 3 | tr -s ' ' '_' )" - oc exec $i -- /bin/bash -c 'oc get --raw /debug/pprof/heap --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' > /tmp/artifacts/metrics/${file}-heap - oc exec $i -- /bin/bash -c 'oc get --raw /metrics --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' | gzip -c > /tmp/artifacts/metrics/${file}-api.gz - oc exec $i -- /bin/bash -c 'oc get --raw /debug/pprof/heap --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' > /tmp/artifacts/metrics/${file}-controllers-heap - oc exec $i -- /bin/bash -c 'oc get --raw /metrics --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' | gzip -c > /tmp/artifacts/metrics/${file}-controllers.gz - done < /tmp/pods-api - - while IFS= read -r i; do - file="$( echo "$i" | cut -d ' ' -f 2,3,5 | tr -s ' ' '_' )" - oc logs --request-timeout=20s $i | gzip -c > /tmp/artifacts/pods/${file}.log.gz - oc logs --request-timeout=20s -p $i | gzip -c > /tmp/artifacts/pods/${file}_previous.log.gz - done < /tmp/containers - - echo "Waiting for node logs to finish ..." - wait - - echo "Deprovisioning cluster ..." - export AWS_SHARED_CREDENTIALS_FILE=/tmp/cluster/credentials - cd /tmp/shared/cluster - for i in `seq 1 2`; do - tectonic destroy --dir=. --log-level=debug - done - } - - trap 'teardown' EXIT - trap 'kill $(jobs -p); exit 0' TERM - - for i in `seq 1 120`; do - if [[ -f /tmp/shared/exit ]]; then - exit 0 - fi - sleep 60 & wait - done diff --git a/templates/cluster-launch-installer-e2e.yaml b/templates/cluster-launch-installer-e2e.yaml deleted file mode 100644 index 1e2871e..0000000 --- a/templates/cluster-launch-installer-e2e.yaml +++ /dev/null @@ -1,367 +0,0 @@ -kind: Template -apiVersion: template.openshift.io/v1 - -parameters: -- name: JOB_NAME_SAFE - required: true -- name: JOB_NAME_HASH - required: true -- name: NAMESPACE - #required: true - value: "your-ns" -- name: IMAGE_FORMAT - required: true -- name: IMAGE_INSTALLER - required: true -- name: IMAGE_TESTS - required: true -- name: CLUSTER_TYPE - #required: true - value: "aws" -- name: TEST_FOCUS - #required: true - value: Suite:openshift/conformance/parallel -- name: TEST_SKIP -- name: TEST_FOCUS_SERIAL - #required: true - value: Suite:openshift/conformance/serial -- name: TEST_SKIP_SERIAL - -objects: - -# We want the cluster to be able to access these images -- kind: RoleBinding - apiVersion: authorization.openshift.io/v1 - metadata: - name: ${JOB_NAME_SAFE}-image-puller - namespace: ${NAMESPACE} - roleRef: - name: system:image-puller - subjects: - - kind: SystemGroup - name: system:unauthenticated - -# The e2e pod spins up a cluster, runs e2e tests, and then cleans up the cluster. -- kind: Pod - apiVersion: v1 - metadata: - name: ${JOB_NAME_SAFE} - namespace: ${NAMESPACE} - annotations: - # we want to gather the teardown logs no matter what - ci-operator.openshift.io/wait-for-container-artifacts: teardown - spec: - restartPolicy: Never - activeDeadlineSeconds: 10800 - terminationGracePeriodSeconds: 900 - volumes: - - name: artifacts - emptyDir: {} - - name: shared-tmp - emptyDir: {} - - name: cluster-profile - secret: - secretName: ${JOB_NAME_SAFE}-cluster-profile - - name: pull-secret-aws - secret: - secretName: cluster-secrets-aws - initContainers: - - name: cli - image: ${IMAGE_CLI} - volumeMounts: - - name: shared-tmp - mountPath: /tmp/shared - command: - - cp - - /usr/bin/oc - - /tmp/shared/oc - - containers: - - # Once admin.kubeconfig exists, executes shared tests - - name: test - image: ${IMAGE_TESTS} - resources: - requests: - cpu: 1 - memory: 300Mi - limits: - cpu: 3 - memory: 2Gi - volumeMounts: - - name: shared-tmp - mountPath: /tmp/shared - - name: cluster-profile - mountPath: /tmp/cluster - - name: artifacts - mountPath: /tmp/artifacts - env: - - name: HOME - value: /tmp/home - - name: KUBECONFIG - value: /tmp/admin.kubeconfig - command: - - /bin/bash - - -c - - | - #!/bin/bash - set -euo pipefail - - trap 'touch /tmp/shared/exit' EXIT - trap 'kill $(jobs -p); exit 0' TERM - - cp "$(which oc)" /tmp/shared/ - - mkdir -p "${HOME}" - - # wait until the setup job creates admin.kubeconfig - while true; do - if [[ -f /tmp/shared/exit ]]; then - echo "Another process exited" 2>&1 - exit 1 - fi - if [[ ! -f /tmp/shared/admin.kubeconfig ]]; then - sleep 15 & wait - continue - fi - break - done - echo "Found shared kubeconfig" - - # don't let clients impact the global kubeconfig - cp /tmp/shared/admin.kubeconfig /tmp/admin.kubeconfig - - PATH=/usr/libexec/origin:$PATH - - # set up cloud provider specific env vars - if [[ "${CLUSTER_TYPE}" == "gcp" ]]; then - export GOOGLE_APPLICATION_CREDENTIALS="/tmp/cluster/gce.json" - export KUBE_SSH_USER=cloud-user - mkdir -p ~/.ssh - cp /tmp/cluster/ssh-privatekey ~/.ssh/google_compute_engine || true - export PROVIDER_ARGS='-provider=gce -gce-zone=us-east1-c -gce-project=openshift-gce-devel-ci' - elif [[ "${CLUSTER_TYPE}" == "aws" ]]; then - region="$( python -c 'import sys, json; print json.load(sys.stdin)["tectonic_aws_region"]' /tmp/rules - oc create ns openshift-telemetry - oc adm policy add-cluster-role-to-user cluster-reader -z default -n openshift-telemetry - oc create secret generic telemeter-client --from-literal=to=https://telemeter.svc.ci.openshift.org --from-literal=id=${NAMESPACE}-${JOB_NAME_HASH} --from-file=token=/tmp/cluster/telemeter-token --from-literal=salt=random-salt --from-file=rules=/tmp/rules --dry-run -o yaml | oc apply -n openshift-telemetry -f - - oc create -f https://raw.githubusercontent.com/openshift/telemeter/master/deploy/telemeter-client-openshift.yaml -n openshift-telemetry - set -e - - # TODO: the test binary should really be a more structured command - most of these flags should be - # autodetected from the running cluster. - # TODO: bump nodes up to 40 again - set -x - if [[ -n "${TEST_SUITE}" || -n "${TEST_FOCUS}" ]]; then - ginkgo -v -noColor -nodes=30 $( which extended.test ) -- \ - -suite "${TEST_SUITE}" -ginkgo.focus="${TEST_FOCUS}" -ginkgo.skip="${TEST_SKIP}" \ - -e2e-output-dir /tmp/artifacts -report-dir /tmp/artifacts/junit \ - -test.timeout=10m ${PROVIDER_ARGS-} || rc=$? - fi - if [[ -n "${TEST_SUITE_SERIAL}" || -n "${TEST_FOCUS_SERIAL}" ]]; then - ginkgo -v -noColor -nodes=1 $( which extended.test ) -- \ - -suite "${TEST_SUITE_SERIAL}" -ginkgo.focus="${TEST_FOCUS_SERIAL}" -ginkgo.skip="${TEST_SKIP_SERIAL}" \ - -e2e-output-dir /tmp/artifacts -report-dir /tmp/artifacts/junit/serial \ - -test.timeout=20m ${PROVIDER_ARGS-} || rc=$? - fi - exit ${rc:-0} - - # Runs an install - - name: setup - image: ${IMAGE_INSTALLER} - volumeMounts: - - name: shared-tmp - mountPath: /tmp - - name: cluster-profile - mountPath: /etc/openshift-installer - - name: artifacts - mountPath: /tmp/artifacts - - name: pull-secret-aws - mountPath: /tmp/pull-secret-aws - env: - - name: INSTANCE_PREFIX - value: ${NAMESPACE}-${JOB_NAME_HASH} - - name: TYPE - value: ${CLUSTER_TYPE} - # TODO - # This is how I ran from local system - # Uploaded from local system, via secret cluster-secrets-aws - - name: AWS_SHARED_CREDENTIALS_FILE - value: /tmp/cluster/credentials - - name: KUBECONFIG - value: /tmp/cluster/generated/auth/kubeconfig - command: - - /bin/bash - - -c - - | - #!/bin/bash - #set -euo pipefail - set -x - - trap 'rc=$?; if [[ $rc -ne 0 ]]; then touch /tmp/exit; fi; exit $rc' EXIT - - mkdir /tmp/cluster - cp /etc/openshift-installer/* /tmp/cluster/ - #DEBUG - ls -al /tmp/pull-secret-aws - cp /tmp/pull-secret-aws/* . - #cp /tmp/pull-secret-aws/credentials /tmp/cluster/credentials - - - export NAME=${INSTANCE_PREFIX} - ( - export EMAIL=test@ci.openshift.io - export PASSWORD=$( date +%s | sha256sum | base64 | head -c 32 ; echo ) - export SSH_KEY="$( cat /tmp/cluster/ssh-publickey )" - export EC2_AMI_OVERRIDE=ami-02323e873420d8eab - export EXPIRATION_DATE=$( date -d '4 hours' --iso=minutes --utc ) - envsubst /tmp/cluster/inputs.yaml - ) - mkdir /tmp/artifacts/installer - cp /tmp/cluster/inputs.yaml /tmp/artifacts/installer/ - - echo "Invoking installer ..." - - cd /tmp/cluster - tectonic init --config=inputs.yaml - mv -f ${NAME}/* /tmp/cluster/ - cp config.yaml internal.yaml terraform.tfvars /tmp/artifacts/installer/ - - tectonic install --dir=. --log-level=debug - - # wait until oc shows up - while true; do - if [[ -f /tmp/exit ]]; then - echo "Interrupted" - cp $KUBECONFIG /tmp/admin.kubeconfig - exit 1 - fi - if [[ ! -f /tmp/oc ]]; then - echo "Waiting for oc binary to show up ..." - sleep 15 & wait - continue - fi - if ! /tmp/oc get nodes 2>/dev/null; then - echo "Waiting for API at $(/tmp/oc whoami --show-server) to respond ..." - sleep 15 & wait - continue - fi - # check multiple namespaces while we are transitioning to the new locations - if /tmp/oc get deploy/router -n tectonic-ingress 2>/dev/null; then - router_namespace=tectonic-ingress - elif /tmp/oc get deploy/router -n openshift-ingress 2>/dev/null; then - router_namespace=openshift-ingress - elif /tmp/oc get deploy/router -n default 2>/dev/null; then - router_namespace=default - else - echo "Waiting for router to be created ..." - sleep 15 & wait - continue - fi - break - done - if ! /tmp/oc wait deploy/router -n "${router_namespace}" --for condition=available --timeout=20m; then - echo "Installation failed" - cp $KUBECONFIG /tmp/admin.kubeconfig - exit 1 - fi - cp $KUBECONFIG /tmp/admin.kubeconfig - echo "Installation successful" - - # Performs cleanup of all created resources - - name: teardown - image: ${IMAGE_INSTALLER} - volumeMounts: - - name: shared-tmp - mountPath: /tmp/shared - - name: cluster-profile - mountPath: /etc/openshift-installer - - name: artifacts - mountPath: /tmp/artifacts - env: - - name: INSTANCE_PREFIX - value: ${NAMESPACE}-${JOB_NAME_HASH} - - name: TYPE - value: ${CLUSTER_TYPE} - - name: KUBECONFIG - value: /tmp/shared/admin.kubeconfig - command: - - /bin/bash - - -c - - | - #!/bin/bash - function teardown() { - set +e - touch /tmp/shared/exit - export PATH=$PATH:/tmp/shared - - echo "Gathering artifacts ..." - mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics - - oc --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes - oc --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers - oc --request-timeout=5s get nodes -o json > /tmp/artifacts/nodes.json - oc --request-timeout=5s get events --all-namespaces -o json > /tmp/artifacts/events.json - oc --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api - - # gather nodes first in parallel since they may contain the most relevant debugging info - while IFS= read -r i; do - mkdir -p /tmp/artifacts/nodes/$i - ( - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/messages | gzip -c > /tmp/artifacts/nodes/$i/messages.gz - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/journal | sed -e 's|.*href="\(.*\)".*|\1|;t;d' > /tmp/journals - while IFS= read -r j; do - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/journal/${j}system.journal | gzip -c > /tmp/artifacts/nodes/$i/journal.gz - done < /tmp/journals - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/metrics | gzip -c > /tmp/artifacts/metrics/node-$i.gz - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap > /tmp/artifacts/nodes/$i/heap - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/secure | gzip -c > /tmp/artifacts/nodes/$i/secure.gz - oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/audit | gzip -c > /tmp/artifacts/nodes/$i/audit.gz - ) & - done < /tmp/nodes - - while IFS= read -r i; do - file="$( echo "$i" | cut -d ' ' -f 3 | tr -s ' ' '_' )" - oc exec $i -- /bin/bash -c 'oc get --raw /debug/pprof/heap --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' > /tmp/artifacts/metrics/${file}-heap - oc exec $i -- /bin/bash -c 'oc get --raw /metrics --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' | gzip -c > /tmp/artifacts/metrics/${file}-api.gz - oc exec $i -- /bin/bash -c 'oc get --raw /debug/pprof/heap --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' > /tmp/artifacts/metrics/${file}-controllers-heap - oc exec $i -- /bin/bash -c 'oc get --raw /metrics --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' | gzip -c > /tmp/artifacts/metrics/${file}-controllers.gz - done < /tmp/pods-api - - while IFS= read -r i; do - file="$( echo "$i" | cut -d ' ' -f 2,3,5 | tr -s ' ' '_' )" - oc logs --request-timeout=20s $i | gzip -c > /tmp/artifacts/pods/${file}.log.gz - oc logs --request-timeout=20s -p $i | gzip -c > /tmp/artifacts/pods/${file}_previous.log.gz - done < /tmp/containers - - echo "Waiting for node logs to finish ..." - wait - - echo "Deprovisioning cluster ..." - export AWS_SHARED_CREDENTIALS_FILE=/tmp/cluster/credentials - cd /tmp/shared/cluster - for i in `seq 1 2`; do - tectonic destroy --dir=. --log-level=debug - done - } - - trap 'teardown' EXIT - trap 'kill $(jobs -p); exit 0' TERM - - for i in `seq 1 120`; do - if [[ -f /tmp/shared/exit ]]; then - exit 0 - fi - sleep 60 & wait - done