diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index ef0fdc11..cb182424 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -13,16 +13,16 @@ jobs:
fail-fast: false
matrix:
include:
- - scala_version: "2.12.15"
- spark_version: "3.3.4"
- java_version: "11"
+ # - scala_version: "2.12.15"
+ # spark_version: "3.3.4"
+ # java_version: "11"
- scala_version: "2.12.18"
spark_version: "3.5.5"
java_version: "17"
- - scala_version: "2.13.8"
- spark_version: "3.3.4"
- java_version: "11"
+ # - scala_version: "2.13.8"
+ # spark_version: "3.3.4"
+ # java_version: "11"
- scala_version: "2.13.8"
spark_version: "3.5.5"
java_version: "17"
@@ -35,4 +35,4 @@ jobs:
with:
spark_version: ${{ matrix.spark_version }}
scala_version: ${{ matrix.scala_version }}
- java_version: ${{ matrix.java_version }}
\ No newline at end of file
+ java_version: ${{ matrix.java_version }}
diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml
index 4df404fc..81165cac 100644
--- a/.github/workflows/e2e.yaml
+++ b/.github/workflows/e2e.yaml
@@ -13,16 +13,16 @@ jobs:
fail-fast: false
matrix:
include:
- - scala_version: "2.12.15"
- spark_version: "3.3.4"
- java_version: "11"
+ # - scala_version: "2.12.15"
+ # spark_version: "3.3.4"
+ # java_version: "11"
- scala_version: "2.12.18"
spark_version: "3.5.5"
java_version: "17"
- - scala_version: "2.13.8"
- spark_version: "3.3.4"
- java_version: "11"
+ # - scala_version: "2.13.8"
+ # spark_version: "3.3.4"
+ # java_version: "11"
- scala_version: "2.13.8"
spark_version: "3.5.5"
java_version: "17"
diff --git a/README.md b/README.md
index 8111cb63..42c645a7 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,90 @@ We recommend [kind](https://kind.sigs.k8s.io/) for local testing. If you are usi
kind load docker-image $IMAGE_NAME --name armada
```
+### Running a remote Armada server using Armada Operator
+The default Armada Operator setup allows only localhost access. You can quickly set up a local Armada server
+configured to allow external access from other hosts, useful for client development and testing. For this
+configuration:
+
+- Copy the file `e2e/kind-config-external-access.yaml` in this repository to `hack/kind-config.yaml`
+in your `armada-operator` repository.
+
+- Edit the newly-copied `hack/kind-config.yaml` as noted in the beginning comments of that file.
+
+- Run the armada-operator setup commands (usually `make kind-all`) to create and start your Armada instance.
+
+- Copy the `$HOME/.kube/config` and `$HOME/.armadctl.yaml` (that Armada Operator will generate) from the Armada
+server host to your `$HOME` directory on the client (local) host. Then edit the local `.kube/config` and on
+the line that has `server: https://0.0.0.0:6443`, change the `0.0.0.0` address to the IP address or hostname
+of the remote Armada server system.
+
+- Generate a copy of the client TLS key, cert, and CA-cert files: (1) go into the `e2e` subdirectory, and
+run `./extract-kind-cert.sh` - it will generate `client.crt`, `client.key`, and `ca.crt`, from the output
+of `kubectl config view`. These files can be left in this directory.
+
+- Copy the `$HOME/.armadactl.yaml` from the Armada server host to your home directory on your client system.
+
+- You should then be able to run `kubectl get pods -A` and see a list of the running pods on the remote
+Armada server, as well as running `armadactl get queues`.
+
+- Verify the functionality of your setup by editing `scripts/config.sh` and changing the following line:
+```
+ARMADA_MASTER=armada://192.168.12.135:30002
+```
+to the IP address or hostname of your Armada server. You should not need to change the port number.
+
+Also, set the location of the three TLS certificate files by adding/setting:
+```
+CLIENT_CERT_FILE=e2e/client.crt
+CLIENT_KEY_FILE=e2e/client.key
+CLUSTER_CA_FILE=e2e/ca.crt
+```
+
+- You should be able to now verify the armada-spark configuration by running the E2E tests:
+```
+$ ./scripts/dev-e2e.sh
+```
+This will save its output to `e2e-test.log` for further debugging.
+
+---
+
+## Development
+
+Before submitting a pull request, please ensure that your code adheres to the project's coding standards and passes all tests.
+
+### Testing
+
+To run the unit tests, use the following command:
+
+```bash
+mvn test
+```
+
+To run the E2E tests, run Armada using the [Operator Quickstart](https://github.com/armadaproject/armada-operator?tab=readme-ov-file#quickstart) guide, then execute:
+
+```bash
+scripts/test-e2e.sh
+```
+### Linting
+
+To check the code for linting issues, use the following command:
+
+```bash
+mvn spotless:check
+```
+
+To automatically apply linting fixes, use:
+
+```bash
+mvn spotless:apply
+```
+
+### E2E
+
+Make sure that the [SparkPi](#sparkpi-example) job successfully runs on your Armada cluster before submitting a pull request.
+
+---
+
## Running Example Workloads
### SparkPi
diff --git a/e2e/extract-kind-cert.sh b/e2e/extract-kind-cert.sh
new file mode 100755
index 00000000..39caef06
--- /dev/null
+++ b/e2e/extract-kind-cert.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+CONTEXT="kind-armada"
+
+E2E_DIR=$(realpath "$0" | xargs dirname)
+
+cd "$E2E_DIR" || (echo "Error: could not cd to $E2E_DIR"; exit 1)
+
+# What These Files Are
+# - client.crt: Your user (client) certificate
+# - client.key: The private key associated with the certificate
+# - ca.crt: The CA certificate used by the Kubernetes API server (for verifying client and server certs)
+
+# Extract the client certificate
+kubectl config view --raw -o json | jq -r \
+ ".users[] | select(.name == \"${CONTEXT}\") | .user.[\"client-certificate-data\"]" | base64 -d > client.crt
+
+# Extract the client key
+kubectl config view --raw -o json | jq -r \
+ ".users[] | select(.name == \"${CONTEXT}\") | .user.[\"client-key-data\"]" | base64 -d > client.key
+
+# Extract the cluster CA certificate
+kubectl config view --raw -o json | jq -r \
+ ".clusters[] | select(.name == \"${CONTEXT}\") | .cluster.[\"certificate-authority-data\"]" | base64 -d > ca.crt
+
diff --git a/e2e/kind-config-external-access.yaml b/e2e/kind-config-external-access.yaml
new file mode 100644
index 00000000..131bc28e
--- /dev/null
+++ b/e2e/kind-config-external-access.yaml
@@ -0,0 +1,52 @@
+# A kind configuration for running an Armada server that can be accessed
+# outside the host system, for working/developing with remote clients,
+# such as Armada-Spark clients.
+#
+# This configuration will allow you to run kubectl and armadactl
+# against the Armada instance on this system. To use this:
+# - Copy your $HOME/.kube/config on this system to the same directory
+# on your remote client host, then modify that copied file so the
+# IP address in there (0.0.0.0) is the address of the external interface
+# mentioned below.
+# - Copy your $HOME/.armadactl.yaml to your $HOME directory on the remote
+# client host, in that copied file, change the value of the 'armadaUrl'
+# field from 'localhost' to the hostname (or IP address) of this server,
+# and below that line a new line (at same indent level), add the entry
+# forceNoTls: true
+# You should then be able to run `kubectl cluster-info` or
+# `armadactl get queues` without errors on the remote client host.
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+ kubeadmConfigPatches:
+ - |
+ kind: ClusterConfiguration
+ apiServer:
+ certSANs:
+ - localhost
+ - 127.0.0.1
+ # replace the following line with the IP address
+ # of the external interface on this system
+ - 192.168.12.135
+ - 0.0.0.0
+ extraPortMappings:
+ # Lookout UI
+ - containerPort: 30000
+ hostPort: 30000
+ protocol: TCP
+ # Armada Server REST API
+ - containerPort: 30001
+ hostPort: 30001
+ protocol: TCP
+ # Armada Server gRPC API
+ - containerPort: 30002
+ hostPort: 30002
+ protocol: TCP
+ # Kubernetes API
+ - containerPort: 6443
+ hostPort: 6443
+ protocol: TCP
+- role: worker
+ labels:
+ armada-spark: true
diff --git a/example/jupyter/notebooks/jupyter_armada_spark.ipynb b/example/jupyter/notebooks/jupyter_armada_spark.ipynb
index 3db16d77..544ad7fe 100644
--- a/example/jupyter/notebooks/jupyter_armada_spark.ipynb
+++ b/example/jupyter/notebooks/jupyter_armada_spark.ipynb
@@ -82,7 +82,7 @@
"driver_host = os.environ.get('SPARK_DRIVER_HOST')\n",
"driver_port = os.environ.get('SPARK_DRIVER_PORT', '7078')\n",
"block_manager_port = os.environ.get('SPARK_BLOCK_MANAGER_PORT', '10061')\n",
- "armada_master = os.environ.get('ARMADA_MASTER', 'local://armada://host.docker.internal:30002')\n",
+ "armada_master = os.environ.get('ARMADA_MASTER', 'armada://host.docker.internal:30002')\n",
"armada_queue = os.environ.get('ARMADA_QUEUE', 'default')\n",
"armada_namespace = os.environ.get('ARMADA_NAMESPACE', 'default')\n",
"image_name = os.environ.get('IMAGE_NAME', 'spark:armada')\n",
diff --git a/scripts/dev-e2e.sh b/scripts/dev-e2e.sh
index 9bead731..99463161 100755
--- a/scripts/dev-e2e.sh
+++ b/scripts/dev-e2e.sh
@@ -7,7 +7,7 @@ source "$scripts"/init.sh
STATUSFILE="$(mktemp)"
AOREPO='https://github.com/armadaproject/armada-operator.git'
-AOHOME="$scripts/../../armada-operator"
+AOHOME=$(realpath "$scripts/../../armada-operator")
ARMADACTL_VERSION='0.20.23'
GREEN='\033[0;32m'
@@ -87,12 +87,37 @@ start-armada() {
fi
fi
- echo "Running 'make kind-all' to install and start Armada; this may take up to 6 minutes"
+ kind_extern_cfg='e2e/kind-config-external-access.yaml'
+ if ! cp "$kind_extern_cfg" "$AOHOME/hack/kind-config.yaml"; then
+ err "There was an error copying $kind_extern_cfg to $AOHOME/hack/kind-config.yaml"
+ exit 1
+ fi
+
+ # Get IP address of first network interface that is not loopback or a K8S internal network interface
+ external_ip=$(ifconfig -a| grep -w 'inet' | grep -v 'inet 127\.0\.0' | grep -v 'inet 172\.' | awk '{print $2}' | sed -ne '1p')
+ if [ "$(uname -s)" = 'Darwin' ]; then
+ sed_opt='-I .bak'
+ else
+ sed_opt='-i.bak'
+ fi
+
+ if ! sed "$sed_opt" -e "s/192.168.12.135/$external_ip/" "$AOHOME/hack/kind-config.yaml"; then
+ err "There was an error modifying $AOHOME/hack/kind-config.yaml"
+ exit 1
+ fi
+
+ echo "Running 'make kind-all' to install and start Armada; this may take up to 6 minutes"
if ! (cd "$AOHOME"; make kind-all 2>&1) | tee armada-start.txt; then
echo ""
err "There was a problem starting Armada; exiting now"
exit 1
fi
+
+ echo "Extracting TLS client certificate files from Kind cluster"
+ if ! e2e/extract-kind-cert.sh; then
+ err "There was a problem extracting the certificates"
+ exit 1
+ fi
}
init-cluster() {
@@ -102,6 +127,12 @@ init-cluster() {
exit 1
fi
+ if ! (echo "$INIT_CONTAINER_IMAGE" | grep -Eq '^[[:alnum:]_]+:[[:alnum:]_]+$'); then
+ err "INIT_CONTAINER_IMAGE is not defined. Please set it in $scripts/config.sh, for example:"
+ err "INIT_CONTAINER_IMAGE=busybox:latest"
+ exit 1
+ fi
+
if [ -z "$ARMADA_QUEUE" ]; then
err "ARMADA_QUEUE is not defined. Please set it in $scripts/config.sh, for example:"
err "ARMADA_QUEUE=spark-test"
@@ -120,6 +151,17 @@ init-cluster() {
exit 1
fi
+ echo "Checking if image $INIT_CONTAINER_IMAGE is available"
+ if ! docker image inspect "$INIT_CONTAINER_IMAGE" > /dev/null 2>&1; then
+ echo "Image $INIT_CONTAINER_IMAGE not found in local Docker instance; pulling it from Docker Hub."
+ if ! docker pull "$INIT_CONTAINER_IMAGE"; then
+ err "Could not pull $INIT_CONTAINER_IMAGE; please try running"
+ err " docker pull $INIT_CONTAINER_IMAGE"
+ err "then run this script again"
+ exit 1
+ fi
+ fi
+
echo "Checking to see if Armada cluster is available ..."
if ! "$scripts"/armadactl get queues > "$STATUSFILE" 2>&1 ; then
@@ -140,19 +182,50 @@ init-cluster() {
mkdir -p "$scripts/.tmp"
- TMPDIR="$scripts/.tmp" "$AOHOME/bin/tooling/kind" load docker-image "$IMAGE_NAME" --name armada 2>&1 \
- | log_group "Loading Docker image $IMAGE_NAME into Armada cluster";
+ if [[ "$ARMADA_MASTER" == *"//localhost"* ]] ; then
+ for IMG in "$IMAGE_NAME" "$INIT_CONTAINER_IMAGE"; do
+ TMPDIR="$scripts/.tmp" "$AOHOME/bin/tooling/kind" load docker-image "$IMG" --name armada 2>&1 \
+ | log_group "Loading Docker image $IMG into Armada (Kind) cluster";
+ done
+ fi
# configure the defaults for the e2e test
- cp $scripts/../e2e/spark-defaults.conf $scripts/../conf/spark-defaults.conf
+ cp "$scripts/../e2e/spark-defaults.conf" "$scripts/../conf/spark-defaults.conf"
- log "Waiting 60 seconds for Armada to stabilize ..."
- sleep 60
+ # If using a remote Armada server, assume it is already running and ready
+ if [[ "$ARMADA_MASTER" == *"//localhost"* ]] ; then
+ log "Waiting 60 seconds for Armada to stabilize ..."
+ sleep 60
+ fi
}
run-test() {
echo "Running Scala E2E test suite..."
+ if [[ ! -d ".spark-$SPARK_VERSION" ]]; then
+ echo "Checking out Spark sources for tag v$SPARK_VERSION."
+ git clone https://github.com/apache/spark --branch v$SPARK_VERSION --depth 1 --no-tags ".spark-$SPARK_VERSION"
+ fi
+
+ cd ".spark-$SPARK_VERSION"
+ # Spark 3.3.4 does not compile without this fix
+ if [[ "$SPARK_VERSION" == "3.3.4" ]]; then
+ sed -i -e "s%2.13.8%2.13.6%" pom.xml
+ # Fix deprecated openjdk base image - use eclipse-temurin:11-jammy instead.
+ spark_dockerfile="resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile"
+ if [ -f "$spark_dockerfile" ]; then
+ sed -i -e 's|FROM openjdk:|FROM eclipse-temurin:|g' "$spark_dockerfile"
+ sed -i -E 's/^ARG java_image_tag=11-jre-slim$/ARG java_image_tag=11-jammy/' "$spark_dockerfile"
+ fi
+ fi
+ ./dev/change-scala-version.sh $SCALA_BIN_VERSION
+ # by packaging the assembly project specifically, jars of all depending Spark projects are fetch from Maven
+ # spark-examples jars are not released, so we need to build these from sources
+ ./build/mvn --batch-mode clean
+ ./build/mvn --batch-mode package -pl examples
+ ./build/mvn --batch-mode package -Pkubernetes -Pscala-$SCALA_BIN_VERSION -pl assembly
+ cd ..
+
# Add armadactl to PATH so the e2e framework can access it
PATH="$scripts:$AOHOME/bin/tooling/:$PATH"
export PATH
@@ -160,23 +233,29 @@ run-test() {
# Change to armada-spark directory
cd "$scripts/.."
+ tls_args=()
+ test -n "${CLIENT_CERT_FILE:-}" && tls_args+=( -Dclient_cert_file="$CLIENT_CERT_FILE" )
+ test -n "${CLIENT_KEY_FILE:-}" && tls_args+=( -Dclient_key_file="$CLIENT_KEY_FILE" )
+ test -n "${CLUSTER_CA_FILE:-}" && tls_args+=( -Dcluster_ca_file="$CLUSTER_CA_FILE" )
+
# Run the Scala E2E test suite
- mvn scalatest:test -Dsuites="org.apache.spark.deploy.armada.e2e.ArmadaSparkE2E" \
+ # env MAVEN_OPTS='-Dcom.sun.net.ssl.checkRevocation=false'
+ env KUBERNETES_TRUST_CERTIFICATES=true \
+ mvn -e scalatest:test -Dsuites="org.apache.spark.deploy.armada.e2e.ArmadaSparkE2E" \
-Dcontainer.image="$IMAGE_NAME" \
-Dscala.version="$SCALA_VERSION" \
-Dscala.binary.version="$SCALA_BIN_VERSION" \
-Dspark.version="$SPARK_VERSION" \
-Darmada.queue="$ARMADA_QUEUE" \
- -Darmada.master="armada://localhost:30002" \
- -Darmada.lookout.url="http://localhost:30000" \
- -Darmadactl.path="$scripts/armadactl" 2>&1 | \
- tee e2e-test.log
-
+ -Darmada.master="armada://$ARMADA_MASTER" \
+ -Darmada.lookout.url="$ARMADA_LOOKOUT_URL" \
+ -Darmadactl.path="$scripts/armadactl" \
+ ${tls_args[@]:-} 2>&1 | tee e2e-test.log
TEST_EXIT_CODE=${PIPESTATUS[0]}
if [ "$TEST_EXIT_CODE" -ne 0 ]; then
err "E2E tests failed with exit code $TEST_EXIT_CODE"
- exit $TEST_EXIT_CODE
+ exit "$TEST_EXIT_CODE"
fi
log "E2E tests completed successfully"
@@ -187,4 +266,4 @@ main() {
run-test
}
-main
\ No newline at end of file
+main
diff --git a/scripts/init.sh b/scripts/init.sh
index 7c021799..a5e22c81 100644
--- a/scripts/init.sh
+++ b/scripts/init.sh
@@ -104,6 +104,7 @@ export INCLUDE_PYTHON="${INCLUDE_PYTHON:-false}"
export USE_KIND="${USE_KIND:-false}"
export IMAGE_NAME="${IMAGE_NAME:-spark:armada}"
export ARMADA_MASTER="${ARMADA_MASTER:-armada://localhost:30002}"
+export ARMADA_LOOKOUT_URL="${ARMADA_LOOKOUT_URL:-https://localhost:30000}"
export ARMADA_INTERNAL_URL="${ARMADA_INTERNAL_URL:-armada://armada-server.armada:50051}"
export ARMADA_QUEUE="${ARMADA_QUEUE:-test}"
export ARMADA_AUTH_TOKEN=${ARMADA_AUTH_TOKEN:-}
@@ -112,9 +113,20 @@ export ARMADA_EVENT_WATCHER_USE_TLS=${ARMADA_EVENT_WATCHER_USE_TLS:-false}
export SPARK_BLOCK_MANAGER_PORT=${SPARK_BLOCK_MANAGER_PORT:-}
export SCALA_CLASS="${SCALA_CLASS:-org.apache.spark.examples.SparkPi}"
export RUNNING_E2E_TESTS="${RUNNING_E2E_TESTS:-false}"
+export INIT_CONTAINER_IMAGE="${INIT_CONTAINER_IMAGE:-busybox:latest}"
export USE_FALLBACK_STORAGE="${USE_FALLBACK_STORAGE:-false}"
export SPARK_SECRET_KEY="${SPARK_SECRET_KEY:-armada-secret}"
+if [ -n "${CLIENT_CERT_FILE:-}" ]; then
+ export CLIENT_CERT_FILE="${CLIENT_CERT_FILE}"
+fi
+if [ -n "${CLIENT_CERT_KEY:-}" ]; then
+ export CLIENT_CERT_KEY="${CLIENT_CERT_KEY}"
+fi
+if [ -n "${CLUSTER_CA_FILE:-}" ]; then
+ export CLUSTER_CA_FILE="${CLUSTER_CA_FILE}"
+fi
+
ARMADA_AUTH_ARGS=()
# Add auth script path if configured
if [ "$ARMADA_AUTH_SCRIPT_PATH" != "" ]; then
diff --git a/scripts/set-version.sh b/scripts/set-version.sh
index 283ea44a..916fa608 100755
--- a/scripts/set-version.sh
+++ b/scripts/set-version.sh
@@ -1,6 +1,24 @@
#!/bin/bash
-root="$(cd "$(dirname "$0")/.."; pwd)"
+root="$(cd "$(dirname "$0")/.." || exit; pwd)"
+SED="sed"
+OS=$(uname -s)
+
+# The sed that macOS ships does not understand all the regex patterns (which
+# we use) that GNU sed does, so look for 'gsed' and use that, if available.
+if [ "$OS" = 'Darwin' ]; then
+ sed_location=$(type -p $SED)
+ if [ "$sed_location" = '/usr/bin/sed' ]; then
+ type -p gsed > /dev/null
+ if [ $? -eq 0 ]; then
+ SED=gsed
+ else
+ echo "$0: the version of sed on this system ($sed_location) does not handle" > /dev/stderr
+ echo "all the GNU sed extensions needed. Please install 'gsed' and re-run this script" > /dev/stderr
+ exit 1
+ fi
+ fi
+fi
if [ $# -eq 2 ]
then
@@ -33,7 +51,7 @@ then
fi
echo "setting spark=$spark and scala=$scala"
- sed -i -E \
+ $SED -i -E \
-e "s%^( )([^_]+)[_0-9.]+()$%\1\2_${scala_compat}\3%" \
-e "s%^( ).+()$%\1${scala_major}\2%" \
-e "s%^( ).+()$%\1${scala_minor}\2%" \
@@ -45,7 +63,8 @@ then
-e "s%^( ).+()$%\1${jackson_version}\2%" \
"$root/pom.xml"
else
- echo "Provide the Spark and Scala version to set"
+ echo "Provide the Spark and Scala version to set; for example:"
+ echo " $0 3.5.5 2.13.5"
exit 1
fi
diff --git a/src/test/scala/org/apache/spark/deploy/armada/e2e/K8sClient.scala b/src/main/scala/org/apache/spark/deploy/armada/K8sClient.scala
similarity index 71%
rename from src/test/scala/org/apache/spark/deploy/armada/e2e/K8sClient.scala
rename to src/main/scala/org/apache/spark/deploy/armada/K8sClient.scala
index d8c92e75..3cd739b8 100644
--- a/src/test/scala/org/apache/spark/deploy/armada/e2e/K8sClient.scala
+++ b/src/main/scala/org/apache/spark/deploy/armada/K8sClient.scala
@@ -15,21 +15,78 @@
* limitations under the License.
*/
-package org.apache.spark.deploy.armada.e2e
+package org.apache.spark.deploy.armada
import org.apache.spark.deploy.armada.Config
-import io.fabric8.kubernetes.client.{DefaultKubernetesClient, KubernetesClient}
+import org.apache.spark.deploy.armada.submit
+import io.fabric8.kubernetes.client.{
+ ConfigBuilder,
+ DefaultKubernetesClient,
+ KubernetesClient,
+ KubernetesClientBuilder
+}
import io.fabric8.kubernetes.api.model.{NamespaceBuilder, Pod}
import io.fabric8.kubernetes.api.model.networking.v1.Ingress
+import org.yaml.snakeyaml.Yaml
+
+import java.io.FileReader
import java.util.concurrent.TimeoutException
+import java.util.Properties
import scala.jdk.CollectionConverters._
import scala.concurrent.duration._
import scala.concurrent.{ExecutionContext, Future}
/** Kubernetes client implementation using fabric8 Kubernetes client library. */
-class K8sClient {
- private val client: KubernetesClient = new DefaultKubernetesClient()
+class K8sClient(props: Properties) {
+ val armadaMaster: String = props.getProperty("armada.master")
+ val pattern = """armada://([^:]+):.*""".r
+
+ // If armadaMaster is local, derive k8sApiURL from ~/.kube/config,
+ // which `kind` will create/update.
+ val yaml = new Yaml()
+ val home = System.getProperty("user.home")
+ val data = yaml.load[java.util.Map[String, Object]](new FileReader(s"$home/.kube/config"))
+ var k8sApiURL = "no-K8S-server-found"
+
+ val clusters =
+ data.get("clusters").asInstanceOf[java.util.List[java.util.Map[String, Object]]].asScala
+ clusters.foreach { entry =>
+ val cluster = entry.get("cluster").asInstanceOf[java.util.Map[String, Object]]
+ k8sApiURL = cluster.get("server").toString()
+ }
+
+ println(s"-------- K8sClient(): armadaMaster = ${armadaMaster}")
+ println(s"-------- K8sClient(): k8sApiURL= ${k8sApiURL}")
+
+ val clientCertFile: String = props.getProperty("client_cert_file", "")
+ val clientKeyFile: String = props.getProperty("client_key_file", "")
+ val clusterCaFile: String = props.getProperty("cluster_ca_file", "")
+
+ var cb: ConfigBuilder = new ConfigBuilder()
+ .withMasterUrl(k8sApiURL)
+ // .withOauthToken("sha256~secret")
+ .withNamespace("default")
+
+ if (clusterCaFile.nonEmpty) {
+ cb = cb.withCaCertFile(clusterCaFile)
+ }
+
+ if (clientCertFile.nonEmpty) {
+ cb = cb.withClientCertFile(clientCertFile)
+ }
+
+ if (clientKeyFile.nonEmpty) {
+ cb = cb.withClientKeyFile(clientKeyFile)
+ }
+
+ val cfg = cb
+ .withClientKeyAlgo("RSA")
+ .build()
+
+ private val client: KubernetesClient = new KubernetesClientBuilder()
+ .withConfig(cfg)
+ .build()
def createNamespace(name: String)(implicit ec: ExecutionContext): Future[Unit] = Future {
val namespace = new NamespaceBuilder()
diff --git a/src/main/scala/org/apache/spark/deploy/armada/submit/ArmadaUtils.scala b/src/main/scala/org/apache/spark/deploy/armada/submit/ArmadaUtils.scala
index 4f09d748..61f8f289 100644
--- a/src/main/scala/org/apache/spark/deploy/armada/submit/ArmadaUtils.scala
+++ b/src/main/scala/org/apache/spark/deploy/armada/submit/ArmadaUtils.scala
@@ -28,11 +28,7 @@ object ArmadaUtils {
import ArmadaUtilsExceptions._
def parseMasterUrl(masterUrl: String): (String, Int) = {
- val startString = if (masterUrl.startsWith("local")) {
- "local://armada://"
- } else {
- "armada://"
- }
+ val startString = "armada://"
Some(masterUrl)
.map(_.substring(startString.length).split(":").toSeq)
.filter(_.length == 2)
diff --git a/src/test/scala/org/apache/spark/deploy/armada/e2e/ArmadaOperations.scala b/src/test/scala/org/apache/spark/deploy/armada/e2e/ArmadaOperations.scala
index 5d0c1e0e..3e40b611 100644
--- a/src/test/scala/org/apache/spark/deploy/armada/e2e/ArmadaOperations.scala
+++ b/src/test/scala/org/apache/spark/deploy/armada/e2e/ArmadaOperations.scala
@@ -213,7 +213,23 @@ class ArmadaClient(armadaUrl: String = "localhost:30002") {
val armadactlCmd = resolveArmadactlPath.getOrElse {
throw new RuntimeException("armadactl not found in system properties or PATH")
}
- Seq(armadactlCmd) ++ subCommand.split(" ") ++ Seq("--armadaUrl", armadaUrl)
+ // armadactl command expects the server address to be of the form
+ // : with no pseudo-protocol prefix
+ val pattern = """.*armada://(.+)""".r
+ var armadactlUrl = "undefined-armadactl-url"
+
+ armadaUrl match {
+ case pattern(hostPort) => armadactlUrl = hostPort // e.g. "localhost:30002"
+ case _ =>
+ throw new RuntimeException(
+ s"could not extract valid armadactl URL from armada URL ${armadaUrl}"
+ )
+ }
+
+ // var armadactlUrl = armadaUrl.replaceFirst("^armada://", "")
+ println(s"-------- buildCommand(): armadaUrl = ${armadaUrl}")
+ println(s"-------- buildCommand(): armadactlUrl = ${armadactlUrl}")
+ Seq(armadactlCmd) ++ subCommand.split(" ") ++ Seq("--armadaUrl", armadactlUrl)
}
/** Resolves the path to `armadactl`:
diff --git a/src/test/scala/org/apache/spark/deploy/armada/e2e/ArmadaSparkE2E.scala b/src/test/scala/org/apache/spark/deploy/armada/e2e/ArmadaSparkE2E.scala
index db092bd9..f7b6b592 100644
--- a/src/test/scala/org/apache/spark/deploy/armada/e2e/ArmadaSparkE2E.scala
+++ b/src/test/scala/org/apache/spark/deploy/armada/e2e/ArmadaSparkE2E.scala
@@ -17,6 +17,8 @@
package org.apache.spark.deploy.armada.e2e
+import org.apache.spark.deploy.armada.K8sClient
+
import org.scalatest.BeforeAndAfterAll
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
@@ -42,9 +44,9 @@ class ArmadaSparkE2E
private val baseQueueName = "e2e-template"
- private lazy val armadaClient = new ArmadaClient()
- private lazy val k8sClient = new K8sClient()
- private lazy val orchestrator = new TestOrchestrator(armadaClient, k8sClient)
+ private var armadaClient: ArmadaClient = _
+ private var k8sClient: K8sClient = _
+ implicit private var orch: TestOrchestrator = _
private var baseConfig: TestConfig = _
@@ -59,6 +61,11 @@ class ArmadaSparkE2E
val props = loadProperties()
+ val armadaApiUrl = props.getProperty("armada.master", "localhost:30002")
+ armadaClient = new ArmadaClient(armadaApiUrl)
+ k8sClient = new K8sClient(props)
+ orch = new TestOrchestrator(armadaClient, k8sClient)
+
// Get Scala binary version - either from system property or derive from full version
// This should be "2.12" or "2.13", not the full version like "2.12.15"
val scalaBinaryVersion = props.getProperty("scala.binary.version") match {
@@ -148,8 +155,6 @@ class ArmadaSparkE2E
super.afterAll()
}
- implicit val orch: TestOrchestrator = orchestrator
-
private def loadProperties(): Properties = {
val props = new Properties()
diff --git a/src/test/scala/org/apache/spark/deploy/armada/e2e/TestAssertions.scala b/src/test/scala/org/apache/spark/deploy/armada/e2e/TestAssertions.scala
index f4a5cea0..9b5e9c39 100644
--- a/src/test/scala/org/apache/spark/deploy/armada/e2e/TestAssertions.scala
+++ b/src/test/scala/org/apache/spark/deploy/armada/e2e/TestAssertions.scala
@@ -20,6 +20,7 @@ package org.apache.spark.deploy.armada.e2e
import java.util.concurrent.atomic.AtomicInteger
import io.fabric8.kubernetes.api.model.Pod
+import org.apache.spark.deploy.armada.K8sClient
import org.apache.spark.deploy.armada.Config
import scala.jdk.CollectionConverters._
import scala.concurrent.{ExecutionContext, Future}
diff --git a/src/test/scala/org/apache/spark/deploy/armada/e2e/TestOrchestrator.scala b/src/test/scala/org/apache/spark/deploy/armada/e2e/TestOrchestrator.scala
index ca74df48..28584bb5 100644
--- a/src/test/scala/org/apache/spark/deploy/armada/e2e/TestOrchestrator.scala
+++ b/src/test/scala/org/apache/spark/deploy/armada/e2e/TestOrchestrator.scala
@@ -20,6 +20,7 @@ package org.apache.spark.deploy.armada.e2e
import java.io.File
import java.util.UUID
import java.util.concurrent.TimeoutException
+import org.apache.spark.deploy.armada.K8sClient
import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration._
import TestConstants._
@@ -74,6 +75,7 @@ class TestOrchestrator(
armadaClient: ArmadaClient,
k8sClient: K8sClient
)(implicit ec: ExecutionContext) {
+ val sparkRepoCopy = ".spark-3.5.5"
private val jobSubmitTimeout = JobSubmitTimeout
private val jobWatchTimeout = JobWatchTimeout
@@ -253,6 +255,7 @@ class TestOrchestrator(
println(s"Test ID: ${context.testId}")
println(s"Namespace: ${context.namespace}")
println(s"Queue: $queueName")
+ println(s"MasterURL: ${config.masterUrl}")
println(s"Deployment Mode: $modeType")
val resultFuture = for {
@@ -356,7 +359,7 @@ class TestOrchestrator(
"spark.submit.deployMode" -> deployMode
)
- val dockerCommand = buildDockerCommand(
+ val runTestCommand = buildRunTestCommand(
config.imageName,
volumeMounts,
config.masterUrl,
@@ -384,7 +387,7 @@ class TestOrchestrator(
println(s"[SUBMIT] $key = $displayValue")
}
// Properly escape command for shell reproduction
- val escapedCommand = dockerCommand.map { arg =>
+ val escapedCommand = runTestCommand.map { arg =>
if (arg.contains(" ") || arg.contains("'") || arg.contains("\"")) {
"'" + arg.replace("'", "'\\''") + "'"
} else arg
@@ -395,7 +398,7 @@ class TestOrchestrator(
def attemptSubmit(attempt: Int = 1): ProcessResult = {
// In client mode, spark-submit runs until application completes, so use longer timeout
val timeout = if (!modeHelper.isDriverInCluster) jobWatchTimeout else jobSubmitTimeout
- val result = ProcessExecutor.executeWithResult(dockerCommand, timeout)
+ val result = ProcessExecutor.executeWithResult(runTestCommand, timeout)
if (result.exitCode != 0) {
val allOutput = result.stdout + "\n" + result.stderr
@@ -588,7 +591,7 @@ class TestOrchestrator(
TestResult(jobSetId, queueName, finalStatus, assertionResults)
}
- private def buildDockerCommand(
+ private def buildRunTestCommand(
imageName: String,
volumeMounts: Seq[String],
masterUrl: String,
@@ -605,6 +608,11 @@ class TestOrchestrator(
val deployMode = if (modeHelper.isDriverInCluster) "cluster" else "client"
val isClientMode = !modeHelper.isDriverInCluster
+ val driverClassPath = Seq(
+ ".",
+ "./target/armada-cluster-manager_2.13-1.0.0-SNAPSHOT-all.jar"
+ ).mkString(":")
+
val baseCommand = Seq(
"docker",
"run",
diff --git a/src/test/scala/org/apache/spark/deploy/armada/submit/ArmadaClientApplicationSuite.scala b/src/test/scala/org/apache/spark/deploy/armada/submit/ArmadaClientApplicationSuite.scala
index 6142a7e0..e8c7ec61 100644
--- a/src/test/scala/org/apache/spark/deploy/armada/submit/ArmadaClientApplicationSuite.scala
+++ b/src/test/scala/org/apache/spark/deploy/armada/submit/ArmadaClientApplicationSuite.scala
@@ -61,7 +61,6 @@ class ArmadaClientApplicationSuite extends AnyFunSuite with BeforeAndAfter with
private val SPARK_DRIVER_URL = "SPARK_DRIVER_URL"
// Constants for paths
- private val PYTHON_EXAMPLE_PATH = "/opt/spark/examples/src/main/python/pi.py"
private val clientArguments = ClientArguments(
mainAppResource = JavaMainAppResource(Some("app.jar")),
mainClass = "org.example.SparkApp",