From be7f954c91413ead6acb0471839a43969d6c7b39 Mon Sep 17 00:00:00 2001 From: Mladjan Gadzic Date: Tue, 7 Oct 2025 13:02:05 +0100 Subject: [PATCH] patch internal changes Signed-off-by: Mladjan Gadzic --- pom.xml | 17 +- .../java/io/armadaproject/ArmadaMapper.java | 2 +- .../io/armadaproject/ClusterConfigParser.java | 41 -- .../jenkins/plugin/ArmadaCloud.java | 442 ++---------------- .../plugin/ArmadaClusterInfoProvider.java | 131 ++++++ .../jenkins/plugin/ArmadaComputer.java | 118 +++++ .../jenkins/plugin/ArmadaComputerFactory.java | 37 ++ .../jenkins/plugin/ArmadaLauncher.java | 304 ++++++++++++ .../jenkins/plugin/DefaultInProvisioning.java | 2 +- .../jenkins/plugin/GarbageCollection.java | 219 --------- .../plugin/KubernetesClientProvider.java | 99 ++-- .../jenkins/plugin/KubernetesComputer.java | 221 --------- .../plugin/KubernetesComputerFactory.java | 37 -- .../plugin/KubernetesFactoryAdapter.java | 2 +- .../plugin/KubernetesFolderProperty.java | 20 +- .../jenkins/plugin/KubernetesLauncher.java | 370 --------------- .../plugin/KubernetesProvisioningLimits.java | 10 +- .../plugin/KubernetesQueueTaskDispatcher.java | 11 +- .../jenkins/plugin/KubernetesSlave.java | 365 +++++++-------- .../jenkins/plugin/MetricNames.java | 16 +- .../OpenShiftBearerTokenCredentialImpl.java | 22 - .../plugin/OpenShiftTokenCredentialImpl.java | 52 --- .../jenkins/plugin/PodTemplate.java | 41 +- .../jenkins/plugin/PodTemplateBuilder.java | 34 +- .../jenkins/plugin/PodTemplateUtils.java | 2 - .../jenkins/plugin/PodUtils.java | 70 +-- .../plugin/StandardPlannedNodeBuilder.java | 2 +- .../plugin/job/ArmadaClientParameters.java | 33 ++ .../plugin/job/ArmadaClientProvider.java | 8 + .../jenkins/plugin/job/ArmadaClientUtil.java | 185 ++++++++ .../plugin/job/ArmadaEventWatcher.java | 9 + .../plugin/job/ArmadaGarbageCollection.java | 29 ++ .../jenkins/plugin/job/ArmadaJobManager.java | 239 ++++++++++ .../jenkins/plugin/job/ArmadaJobMetadata.java | 67 +++ .../jenkins/plugin/job/ArmadaJobNotifier.java | 78 ++++ .../plugin/job/ArmadaJobSetEventWatcher.java | 182 ++++++++ .../plugin/job/ArmadaJobSetManager.java | 388 +++++++++++++++ .../plugin/job/ArmadaJobSetStrategy.java | 5 + .../job/ArmadaLaunchFailedOfflineCause.java | 17 + .../jenkins/plugin/job/ArmadaState.java | 208 +++++++++ .../plugin/job/DailyArmadaJobSetStrategy.java | 30 ++ .../pipeline/ArmadaDeclarativeAgent.java | 24 - .../pipeline/ArmadaPodTemplateStep.java | 18 - .../pipeline/ContainerExecDecorator.java | 7 +- .../KubernetesAgentErrorCondition.java | 6 +- .../pipeline/KubernetesNodeContext.java | 55 +-- .../plugin/pipeline/PodTemplateContext.java | 8 +- .../pipeline/PodTemplateStepExecution.java | 23 +- .../plugin/pipeline/SecretsMasker.java | 17 +- .../jenkins/plugin/pod/retention/Always.java | 57 --- .../jenkins/plugin/pod/retention/Default.java | 75 --- .../jenkins/plugin/pod/retention/Never.java | 56 --- .../plugin/pod/retention/OnFailure.java | 72 --- .../plugin/pod/retention/PodRetention.java | 50 -- .../pod/retention/PodRetentionDescriptor.java | 8 - .../jenkins/plugin/pod/retention/Reaper.java | 439 +++++------------ .../jenkins/plugin/ArmadaCloud/config.jelly | 26 +- .../ArmadaCloud/help-garbageCollection.html | 5 - .../plugin/ArmadaCloud/help-podRetention.html | 15 - .../ArmadaCloud/help-serverCertificate.html | 3 - .../plugin/GarbageCollection/config.jelly | 30 -- .../GarbageCollection/help-namespaces.html | 2 - .../jenkins/plugin/Messages.properties | 4 +- .../jenkins/plugin/PodTemplate/config.jelly | 6 - .../plugin/PodTemplate/help-namespace.html | 3 - .../plugin/PodTemplate/help-podRetention.html | 17 - .../KubernetesDeclarativeAgent/config.jelly | 6 - .../pipeline/PodTemplateStep/config.jelly | 7 - .../io/armadaproject/ArmadaMapperTest.java | 1 + .../jenkins/plugin/ArmadaCloudFIPSTest.java | 54 +-- .../jenkins/plugin/ArmadaCloudTest.java | 2 +- .../plugin/KubernetesClientProviderTest.java | 178 +++---- .../plugin/KubernetesFolderPropertyTest.java | 14 +- .../KubernetesQueueTaskDispatcherTest.java | 38 +- .../jenkins/plugin/KubernetesSlaveTest.java | 16 +- .../plugin/PodTemplateBuilderTest.java | 12 +- .../AbstractKubernetesPipelineTest.java | 8 +- .../pipeline/ContainerExecDecoratorTest.java | 8 +- .../ContainerExecDecoratorWindowsTest.java | 8 +- ...ernetesPipelineOverridenNamespaceTest.java | 6 +- .../pipeline/KubernetesPipelineTest.java | 55 +-- .../plugin/pipeline/RestartPipelineTest.java | 12 +- .../pod/decorator/PodDecoratorTest.java | 6 +- .../plugin/pod/retention/ReaperTest.java | 44 +- .../plugin/pipeline/cascadingDelete.groovy | 1 - .../plugin/pipeline/declarative.groovy | 1 - .../jenkins/plugin/pipeline/runIn2Pods.groovy | 4 +- .../jenkins/plugin/pipeline/runInPod.groovy | 2 +- .../plugin/pipeline/runInPodFromYaml.groovy | 2 +- .../pipeline/runInPodWithRetention.groovy | 2 +- 90 files changed, 2754 insertions(+), 2924 deletions(-) create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/ArmadaClusterInfoProvider.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/ArmadaComputer.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/ArmadaComputerFactory.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/ArmadaLauncher.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientParameters.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientProvider.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientUtil.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaEventWatcher.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaGarbageCollection.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobManager.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobMetadata.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobNotifier.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetEventWatcher.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetManager.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetStrategy.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaLaunchFailedOfflineCause.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaState.java create mode 100644 src/main/java/io/armadaproject/jenkins/plugin/job/DailyArmadaJobSetStrategy.java diff --git a/pom.xml b/pom.xml index 6fcbf9d16..01428759a 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ org.jenkins-ci.plugins plugin - 4.88 + 5.18 @@ -43,21 +43,20 @@ 999999-SNAPSHOT - 2.462.1 - 2.462.x - 3387.v0f2773fa_3200 + 2.504 + ${jenkins.baseline}.1 false true - armadaproject/${artifactId} - 0.0.2 + armadaproject/${project.artifactId} + 0.0.3 io.jenkins.tools.bom - bom-${bom} - ${bom.version} + bom-${jenkins.baseline}.x + 5015.vb_52d36583443 pom import @@ -156,7 +155,7 @@ io.fabric8 kubernetes-server-mock - 6.10.0 + 7.3.1 test diff --git a/src/main/java/io/armadaproject/ArmadaMapper.java b/src/main/java/io/armadaproject/ArmadaMapper.java index 73602c2e6..cce5cd5d6 100644 --- a/src/main/java/io/armadaproject/ArmadaMapper.java +++ b/src/main/java/io/armadaproject/ArmadaMapper.java @@ -1,4 +1,4 @@ -package io.armadaproject; +package io.armadaproject.jenkins.plugin.job; import api.SubmitOuterClass.JobSubmitRequest; import api.SubmitOuterClass.JobSubmitRequest.Builder; diff --git a/src/main/java/io/armadaproject/ClusterConfigParser.java b/src/main/java/io/armadaproject/ClusterConfigParser.java index 01a0c0d14..e69de29bb 100644 --- a/src/main/java/io/armadaproject/ClusterConfigParser.java +++ b/src/main/java/io/armadaproject/ClusterConfigParser.java @@ -1,41 +0,0 @@ -package io.armadaproject; - -import java.util.HashMap; -import java.util.Map; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - -public class ClusterConfigParser { - - public static Map parse(String configPath) throws Exception { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - DocumentBuilder builder = factory.newDocumentBuilder(); - Document doc = builder.parse(configPath); - - doc.getDocumentElement().normalize(); - - Map clusterMap = new HashMap<>(); - - NodeList clusterList = doc.getElementsByTagName("cluster"); - - for (int i = 0; i < clusterList.getLength(); i++) { - Node clusterNode = clusterList.item(i); - - if (clusterNode.getNodeType() == Node.ELEMENT_NODE) { - Element clusterElement = (Element) clusterNode; - - String name = clusterElement.getElementsByTagName("name").item(0).getTextContent(); - String url = clusterElement.getElementsByTagName("url").item(0).getTextContent(); - - clusterMap.put(name, url); - } - } - - return clusterMap; - } - -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/ArmadaCloud.java b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaCloud.java index 2c4c848a7..3505ead9f 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/ArmadaCloud.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaCloud.java @@ -1,6 +1,5 @@ package io.armadaproject.jenkins.plugin; -import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.commons.lang.StringUtils.isEmpty; import static io.armadaproject.jenkins.plugin.KubernetesFactoryAdapter.resolveCredentials; @@ -19,7 +18,6 @@ import hudson.init.InitMilestone; import hudson.init.Initializer; import hudson.model.Descriptor; -import hudson.model.DescriptorVisibilityFilter; import hudson.model.Item; import hudson.model.ItemGroup; import hudson.model.Label; @@ -32,26 +30,12 @@ import hudson.util.ListBoxModel; import hudson.util.XStream2; import io.armadaproject.ArmadaClient; -import io.armadaproject.jenkins.plugin.pod.retention.Default; -import io.armadaproject.jenkins.plugin.pod.retention.PodRetention; -import io.fabric8.kubernetes.client.KubernetesClient; -import io.fabric8.kubernetes.client.KubernetesClientException; -import io.fabric8.kubernetes.client.VersionInfo; + import java.io.IOException; import java.io.StringReader; -import java.net.ConnectException; import java.net.MalformedURLException; -import java.net.SocketTimeoutException; import java.net.URL; -import java.net.UnknownHostException; -import java.security.PublicKey; -import java.security.UnrecoverableKeyException; -import java.security.cert.Certificate; -import java.security.interfaces.DSAPublicKey; -import java.security.interfaces.ECPublicKey; -import java.security.interfaces.RSAPublicKey; import java.util.ArrayList; -import java.util.Base64; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -61,8 +45,12 @@ import java.util.logging.Level; import java.util.logging.Logger; import javax.servlet.ServletException; + +import io.armadaproject.jenkins.plugin.job.ArmadaClientParameters; +import io.armadaproject.jenkins.plugin.job.ArmadaJobSetStrategy; +import io.armadaproject.jenkins.plugin.job.ArmadaState; +import io.armadaproject.jenkins.plugin.job.DailyArmadaJobSetStrategy; import jenkins.authentication.tokens.api.AuthenticationTokens; -import jenkins.bouncycastle.api.PEMEncodable; import jenkins.metrics.api.Metrics; import jenkins.model.Jenkins; import jenkins.model.JenkinsLocationConfiguration; @@ -73,8 +61,6 @@ import org.apache.commons.lang.StringUtils; import io.armadaproject.jenkins.plugin.pipeline.PodTemplateMap; import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuth; -import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; -import org.jenkinsci.plugins.plaincredentials.StringCredentials; import org.jenkinsci.plugins.plaincredentials.impl.StringCredentialsImpl; import org.kohsuke.accmod.Restricted; import org.kohsuke.accmod.restrictions.NoExternalUse; @@ -127,21 +113,16 @@ public class ArmadaCloud extends Cloud implements PodTemplateGroup { private String armadaLookoutUrl; private String armadaLookoutPort; private String armadaJobSetPrefix; - private String armadaJobSetId; private String armadaClusterConfigPath; + private ArmadaJobSetStrategy armadaJobSetStrategy = new DailyArmadaJobSetStrategy(""); - private String serverUrl; private boolean useJenkinsProxy; - @CheckForNull - private String serverCertificate; - private boolean skipTlsVerify; private boolean addMasterProxyEnvVars; private boolean capOnlyOnAlivePods; - private String namespace; private String jnlpregistry; private boolean restrictedPssSecurityContext = false; private boolean webSocket; @@ -170,18 +151,16 @@ public class ArmadaCloud extends Cloud implements PodTemplateGroup { // Integer to differentiate null from 0 private Integer waitForPodSec = DEFAULT_WAIT_FOR_POD_SEC; - @CheckForNull - private PodRetention podRetention = PodRetention.getKubernetesCloudDefault(); - - @CheckForNull - private GarbageCollection garbageCollection; - @DataBoundConstructor public ArmadaCloud(String name) { super(name); setMaxRequestsPerHost(DEFAULT_MAX_REQUESTS_PER_HOST); } + public ArmadaJobSetStrategy getJobSetStrategy() { + return armadaJobSetStrategy; + } + /** * Copy constructor. * Allows to create copies of the original kubernetes cloud. Since it's a singleton @@ -208,8 +187,6 @@ public ArmadaCloud(@NonNull String name, @NonNull ArmadaCloud source) { public ArmadaCloud( String name, List templates, - String serverUrl, - String namespace, String jenkinsUrl, String containerCapStr, int connectTimeout, @@ -217,8 +194,6 @@ public ArmadaCloud( int retentionTimeout) { this(name); - setServerUrl(serverUrl); - setNamespace(namespace); setJenkinsUrl(jenkinsUrl); if (templates != null) { this.templates.addAll(templates); @@ -348,21 +323,13 @@ public void setArmadaLookoutPort(String armadaLookoutPort) { } public String getArmadaJobSetPrefix() { - return StringUtils.isBlank(armadaJobSetPrefix) ? "" : armadaJobSetPrefix + "-"; + return armadaJobSetPrefix; } @DataBoundSetter public void setArmadaJobSetPrefix(String armadaJobSetPrefix) { this.armadaJobSetPrefix = armadaJobSetPrefix; - } - - public String getArmadaJobSetId() { - return armadaJobSetId; - } - - @DataBoundSetter - public void setArmadaJobSetId(String armadaJobSetId) { - this.armadaJobSetId = armadaJobSetId; + this.armadaJobSetStrategy = new DailyArmadaJobSetStrategy(getArmadaJobSetPrefix() + "-" + getDisplayName()); } public String getArmadaClusterConfigPath() { @@ -374,26 +341,6 @@ public void setArmadaClusterConfigPath(String armadaClusterConfigPath) { this.armadaClusterConfigPath = armadaClusterConfigPath; } - public String getServerUrl() { - return serverUrl; - } - - @DataBoundSetter - public void setServerUrl(@NonNull String serverUrl) { - ensureKubernetesUrlInFipsMode(serverUrl); - this.serverUrl = Util.fixEmpty(serverUrl); - } - - public String getServerCertificate() { - return serverCertificate; - } - - @DataBoundSetter - public void setServerCertificate(String serverCertificate) { - ensureServerCertificateInFipsMode(serverCertificate); - this.serverCertificate = Util.fixEmpty(serverCertificate); - } - public boolean isSkipTlsVerify() { return skipTlsVerify; } @@ -413,15 +360,6 @@ public void setAddMasterProxyEnvVars(boolean addMasterProxyEnvVars) { this.addMasterProxyEnvVars = addMasterProxyEnvVars; } - public String getNamespace() { - return namespace; - } - - @DataBoundSetter - public void setNamespace(String namespace) { - this.namespace = Util.fixEmpty(namespace); - } - public String getJnlpregistry() { return jnlpregistry; } @@ -456,15 +394,6 @@ public boolean isCapOnlyOnAlivePods() { return capOnlyOnAlivePods; } - public GarbageCollection getGarbageCollection() { - return garbageCollection; - } - - @DataBoundSetter - public void setGarbageCollection(GarbageCollection garbageCollection) { - this.garbageCollection = garbageCollection; - } - /** * @return same as {@link #getJenkinsUrlOrNull}, if set * @throws IllegalStateException if no Jenkins URL could be computed. @@ -659,64 +588,6 @@ public void setConnectTimeout(int connectTimeout) { this.connectTimeout = Math.max(DEFAULT_CONNECT_TIMEOUT_SECONDS, connectTimeout); } - /** - * Gets the global pod retention policy for the plugin. - */ - public PodRetention getPodRetention() { - return this.podRetention; - } - - /** - * Set the global pod retention policy for the plugin. - * - * @param podRetention the pod retention policy for the plugin. - */ - @DataBoundSetter - public void setPodRetention(PodRetention podRetention) { - if (podRetention == null || podRetention instanceof Default) { - podRetention = PodRetention.getKubernetesCloudDefault(); - } - this.podRetention = podRetention; - } - - /** - * Connects to Kubernetes. - * - * @return Kubernetes client. - */ - @SuppressFBWarnings({"IS2_INCONSISTENT_SYNC", "DC_DOUBLECHECK"}) - public KubernetesClient connect() throws KubernetesAuthException, IOException { - - LOGGER.log(Level.FINEST, "Building connection to Kubernetes {0} URL {1} namespace {2}", new String[] { - getDisplayName(), serverUrl, namespace - }); - KubernetesClient client = KubernetesClientProvider.createClient(this); - - LOGGER.log(Level.FINE, "Connected to Kubernetes {0} URL {1} namespace {2}", new String[] { - getDisplayName(), client.getMasterUrl().toString(), namespace - }); - return client; - } - - /** - * Connects to Kubernetes. - * - * @return Kubernetes client. - */ - @SuppressFBWarnings({"IS2_INCONSISTENT_SYNC", "DC_DOUBLECHECK"}) - public KubernetesClient connect(String serverUrl, String namespace) throws KubernetesAuthException, IOException { - - LOGGER.log(Level.FINEST, "Building connection to Kubernetes {0} URL {1} namespace {2}", new String[] { - getDisplayName(), serverUrl, namespace - }); - KubernetesClient client = KubernetesClientProvider.createClient(this, serverUrl, namespace); - - LOGGER.log(Level.FINE, "Connected to Kubernetes {0} URL {1} namespace {2}", new String[] { - getDisplayName(), client.getMasterUrl().toString(), namespace - }); - return client; - } - @Override public Collection provision( @NonNull final Cloud.CloudState state, final int excessWorkload) { @@ -760,22 +631,6 @@ public Collection provision( } Metrics.metricRegistry().counter(MetricNames.PROVISION_NODES).inc(plannedNodes.size()); return plannedNodes; - } catch (KubernetesClientException e) { - Metrics.metricRegistry().counter(MetricNames.PROVISION_FAILED).inc(); - Throwable cause = e.getCause(); - if (cause instanceof SocketTimeoutException - || cause instanceof ConnectException - || cause instanceof UnknownHostException) { - LOGGER.log(Level.WARNING, "Failed to connect to Kubernetes at {0}: {1}", new String[] { - serverUrl, cause.getMessage() - }); - } else { - LOGGER.log( - Level.WARNING, - "Failed to count the # of live instances on Kubernetes", - cause != null ? cause : e); - } - limitRegistrationResults.unregister(); } catch (Exception e) { Metrics.metricRegistry().counter(MetricNames.PROVISION_FAILED).inc(); LOGGER.log(Level.WARNING, "Failed to count the # of live instances on Kubernetes", e); @@ -809,50 +664,6 @@ private static void ensureSkipTlsVerifyInFipsMode(boolean skipTlsVerify) { } } - /** - * Checks if server certificate is allowed if FIPS mode. - * Allowed certificates use a public key with the following algorithms and sizes: - *
    - *
  • DSA with key size >= 2048
  • - *
  • RSA with key size >= 2048
  • - *
  • Elliptic curve (ED25519) with field size >= 224
  • - *
- * If certificate is valid and allowed or not in FIPS mode method will just exit. - * If not it will throw an {@link IllegalArgumentException}. - * @param serverCertificate String containing the certificate PEM. - */ - private static void ensureServerCertificateInFipsMode(String serverCertificate) { - if (!FIPS140.useCompliantAlgorithms()) { - return; - } - if (StringUtils.isBlank(serverCertificate)) { - return; // JENKINS-73789, no certificate is accepted - } - try { - PEMEncodable pem = PEMEncodable.decode(serverCertificate); - Certificate cert = pem.toCertificate(); - if (cert == null) { - throw new IllegalArgumentException(Messages.KubernetesCloud_serverCertificateNotACertificate()); - } - PublicKey publicKey = cert.getPublicKey(); - if (publicKey instanceof RSAPublicKey) { - if (((RSAPublicKey) publicKey).getModulus().bitLength() < 2048) { - throw new IllegalArgumentException(Messages.KubernetesCloud_serverCertificateKeySize()); - } - } else if (publicKey instanceof DSAPublicKey) { - if (((DSAPublicKey) publicKey).getParams().getP().bitLength() < 2048) { - throw new IllegalArgumentException(Messages.KubernetesCloud_serverCertificateKeySize()); - } - } else if (publicKey instanceof ECPublicKey) { - if (((ECPublicKey) publicKey).getParams().getCurve().getField().getFieldSize() < 224) { - throw new IllegalArgumentException(Messages.KubernetesCloud_serverCertificateKeySizeEC()); - } - } - } catch (RuntimeException | UnrecoverableKeyException | IOException e) { - throw new IllegalArgumentException(e.getMessage(), e); - } - } - @Override public void replaceTemplate(PodTemplate oldTemplate, PodTemplate newTemplate) { this.removeTemplate(oldTemplate); @@ -975,17 +786,12 @@ public boolean equals(Object o) { && maxRequestsPerHost == that.maxRequestsPerHost && Objects.equals(defaultsProviderTemplate, that.defaultsProviderTemplate) && templates.equals(that.templates) - && Objects.equals(serverUrl, that.serverUrl) - && Objects.equals(serverCertificate, that.serverCertificate) - && Objects.equals(namespace, that.namespace) && Objects.equals(jnlpregistry, that.jnlpregistry) && Objects.equals(jenkinsUrl, that.jenkinsUrl) && Objects.equals(jenkinsTunnel, that.jenkinsTunnel) && Objects.equals(credentialsId, that.credentialsId) && Objects.equals(getPodLabels(), that.getPodLabels()) - && Objects.equals(podRetention, that.podRetention) && Objects.equals(waitForPodSec, that.waitForPodSec) - && Objects.equals(garbageCollection, that.garbageCollection) && useJenkinsProxy == that.useJenkinsProxy; } @@ -995,12 +801,9 @@ public int hashCode() { name, defaultsProviderTemplate, templates, - serverUrl, - serverCertificate, skipTlsVerify, addMasterProxyEnvVars, capOnlyOnAlivePods, - namespace, jnlpregistry, jenkinsUrl, jenkinsTunnel, @@ -1012,9 +815,7 @@ public int hashCode() { podLabels, usageRestricted, maxRequestsPerHost, - podRetention, - useJenkinsProxy, - garbageCollection); + useJenkinsProxy); } public Integer getWaitForPodSec() { @@ -1026,30 +827,6 @@ public void setWaitForPodSec(Integer waitForPodSec) { this.waitForPodSec = waitForPodSec; } - public ArmadaClient connectToArmada() throws KubernetesAuthException { - if (StringUtils.isNotBlank(armadaCredentialsId)) { - return secureArmadaConnection(armadaCredentialsId); - } - - return unsecureArmadaConnection(); - } - - public ArmadaClient secureArmadaConnection(String armadaCredentialsId) - throws KubernetesAuthException { - StandardCredentials standardCredentials = resolveCredentials(armadaCredentialsId); - if (!(standardCredentials instanceof StringCredentials)) { - throw new KubernetesAuthException("credentials not a string credentials"); - } - - String secret = ((StringCredentials) standardCredentials).getSecret().getPlainText(); - - return new ArmadaClient(armadaUrl, Integer.parseInt(armadaPort), secret); - } - - public ArmadaClient unsecureArmadaConnection() { - return new ArmadaClient(armadaUrl, Integer.parseInt(armadaPort)); - } - @Restricted(NoExternalUse.class) // jelly public PodTemplate.DescriptorImpl getTemplateDescriptor() { return (PodTemplate.DescriptorImpl) Jenkins.get().getDescriptorOrDie(PodTemplate.class); @@ -1090,55 +867,6 @@ public static void addAliases() { org.jenkinsci.plugins.kubernetes.credentials.FileSystemServiceAccountCredential.class); } - @RequirePOST - @SuppressWarnings("unused") // used by jelly - public FormValidation doTestConnection( - @QueryParameter String name, - @QueryParameter String serverUrl, - @QueryParameter String credentialsId, - @QueryParameter String serverCertificate, - @QueryParameter boolean skipTlsVerify, - @QueryParameter String namespace, - @QueryParameter int connectionTimeout, - @QueryParameter int readTimeout, - @QueryParameter boolean useJenkinsProxy) - throws Exception { - Jenkins.get().checkPermission(Jenkins.MANAGE); - - if (StringUtils.isBlank(name)) return FormValidation.error("name is required"); - - try (KubernetesClient client = new KubernetesFactoryAdapter( - serverUrl, - namespace, - Util.fixEmpty(serverCertificate), - Util.fixEmpty(credentialsId), - skipTlsVerify, - connectionTimeout, - readTimeout, - DEFAULT_MAX_REQUESTS_PER_HOST, - useJenkinsProxy) - .createClient()) { - // test listing pods - client.pods().list(); - VersionInfo version = client.getVersion(); - return FormValidation.ok("Connected to Kubernetes " + version.getGitVersion()); - } catch (KubernetesClientException e) { - LOGGER.log(Level.FINE, String.format("Error testing connection %s", serverUrl), e); - return FormValidation.error( - "Error testing connection %s: %s", - serverUrl, - e.getCause() == null - ? e.getMessage() - : String.format( - "%s: %s", - e.getCause().getClass().getName(), - e.getCause().getMessage())); - } catch (Exception e) { - LOGGER.log(Level.FINE, String.format("Error testing connection %s", serverUrl), e); - return FormValidation.error("Error testing connection %s: %s", serverUrl, e.getMessage()); - } - } - @RequirePOST @SuppressWarnings("unused") // used by jelly @SuppressFBWarnings("REC_CATCH_EXCEPTION") @@ -1154,52 +882,22 @@ public FormValidation doTestArmadaConnection(@QueryParameter String armadaUrl, return FormValidation.error("armadaPort is required"); } - ArmadaClient armadaClient = null; - try { - if (StringUtils.isBlank(armadaCredentialsId)) { - armadaClient = new ArmadaClient(armadaUrl, Integer.parseInt(armadaPort)); - } else { - StandardCredentials standardCredentials = resolveCredentials( - Util.fixEmpty(armadaCredentialsId)); - if (Objects.nonNull(standardCredentials) - && !(standardCredentials instanceof StringCredentials)) { - String message = String.format( - "Error testing Armada connection url:%s, port:%s, cause: credentials not a string credentials", - armadaUrl, armadaPort); - LOGGER.log(Level.FINE, message); - return FormValidation.error(message); - } - - StringCredentials stringCredentials = (StringCredentials) standardCredentials; - if (Objects.isNull(stringCredentials)) { - String message = String.format( - "Error testing Armada connection url:%s, port:%s, cause: string credentials null", - armadaUrl, armadaPort); - LOGGER.log(Level.FINE, message); - return FormValidation.error(message); - } - String secret = stringCredentials.getSecret().getPlainText(); - - armadaClient = new ArmadaClient(armadaUrl, Integer.parseInt(armadaPort), - secret); - } - - if (ServingStatus.SERVING == armadaClient.checkHealth()) { + var parameters = new ArmadaClientParameters(armadaUrl, Integer.parseInt(armadaPort), null, null, armadaCredentialsId, null); + try(var client = ArmadaState.createClient(parameters)) { + if (ServingStatus.SERVING == client.checkHealth()) { return FormValidation.ok("Connected to Armada"); } return FormValidation.error("Connection to Armada failed %s:%s", armadaUrl, - armadaPort); - } catch (Exception e) { + armadaPort); + } catch(Throwable t) { LOGGER.log(Level.FINE, - String.format("Error testing Armada connection %s:%s", armadaUrl, armadaPort), - e); + String.format("Error testing Armada connection %s:%s", armadaUrl, armadaPort), + t); + var cause = t.getCause(); return FormValidation.error( - "Error testing Armada connection url:%s, port:%s, cause:%s", armadaUrl, - armadaPort, e.getCause().toString()); - } finally { - assert armadaClient != null; - armadaClient.close(); + "Error testing Armada connection url:%s, port:%s, cause:%s", armadaUrl, + armadaPort, cause == null ? t.toString() : cause.toString()); } } @@ -1216,31 +914,6 @@ public FormValidation doCheckSkipTlsVerify(@QueryParameter boolean skipTlsVerify return FormValidation.ok(); } - @RequirePOST - @SuppressWarnings({"unused", "lgtm[jenkins/csrf]" - }) // used by jelly and already fixed jenkins security scan warning - public FormValidation doCheckServerCertificate(@QueryParameter String serverCertificate) { - Jenkins.get().checkPermission(Jenkins.MANAGE); - try { - ensureServerCertificateInFipsMode(serverCertificate); - } catch (IllegalArgumentException ex) { - return FormValidation.error(ex, ex.getLocalizedMessage()); - } - return FormValidation.ok(); - } - - @RequirePOST - @SuppressWarnings("unused") // used by jelly - public FormValidation doCheckServerUrl(@QueryParameter String serverUrl) { - Jenkins.get().checkPermission(Jenkins.MANAGE); - try { - ensureKubernetesUrlInFipsMode(serverUrl); - } catch (IllegalArgumentException ex) { - return FormValidation.error(ex.getLocalizedMessage()); - } - return FormValidation.ok(); - } - @RequirePOST @SuppressWarnings("unused") // used by jelly public ListBoxModel doFillCredentialsIdItems( @@ -1394,25 +1067,6 @@ public FormValidation doCheckWebSocket( return FormValidation.ok(); } - @SuppressWarnings("unused") // used by jelly - public List> getAllowedPodRetentions() { - Jenkins jenkins = Jenkins.getInstanceOrNull(); - if (jenkins == null) { - return new ArrayList<>(0); - } - return DescriptorVisibilityFilter.apply(this, jenkins.getDescriptorList(PodRetention.class)); - } - - @SuppressWarnings({"rawtypes", "unused"}) // used by jelly - public Descriptor getDefaultPodRetention() { - Jenkins jenkins = Jenkins.getInstanceOrNull(); - if (jenkins == null) { - return null; - } - return jenkins.getDescriptor( - PodRetention.getKubernetesCloudDefault().getClass()); - } - @SuppressWarnings("unused") // used by jelly public int getDefaultReadTimeout() { return DEFAULT_READ_TIMEOUT_SECONDS; @@ -1435,14 +1089,11 @@ public int getDefaultWaitForPod() { @Override public String toString() { - return "KubernetesCloud{name=" + name + ", defaultsProviderTemplate='" + return "ArmadaCloud{name=" + name + ", defaultsProviderTemplate='" + defaultsProviderTemplate + '\'' + ", serverUrl='" - + serverUrl + '\'' + ", serverCertificate='" - + serverCertificate + '\'' + ", skipTlsVerify=" + skipTlsVerify + ", addMasterProxyEnvVars=" + addMasterProxyEnvVars + ", capOnlyOnAlivePods=" - + capOnlyOnAlivePods + ", namespace='" - + namespace + '\'' + ", jnlpregistry='" + + capOnlyOnAlivePods + ", jnlpregistry='" + jnlpregistry + '\'' + ", jenkinsUrl='" + jenkinsUrl + '\'' + ", jenkinsTunnel='" + jenkinsTunnel + '\'' + ", credentialsId='" @@ -1456,44 +1107,9 @@ public String toString() { + podLabels + ", usageRestricted=" + usageRestricted + ", maxRequestsPerHost=" + maxRequestsPerHost + ", waitForPodSec=" - + waitForPodSec + ", podRetention=" - + podRetention + ", useJenkinsProxy=" + + waitForPodSec + ", useJenkinsProxy=" + useJenkinsProxy + ", templates=" - + templates + ", garbageCollection=" - + garbageCollection + '}'; - } - - private Object readResolve() { - if ((serverCertificate != null) && !serverCertificate.trim().startsWith("-----BEGIN CERTIFICATE-----")) { - serverCertificate = new String(Base64.getDecoder().decode(serverCertificate.getBytes(UTF_8)), UTF_8); - LOGGER.log( - Level.INFO, "Upgraded Kubernetes server certificate key: {0}", serverCertificate.substring(0, 80)); - } - - // FIPS checks if in FIPS mode - ensureServerCertificateInFipsMode(serverCertificate); - ensureKubernetesUrlInFipsMode(serverUrl); - ensureSkipTlsVerifyInFipsMode(skipTlsVerify); - - if (maxRequestsPerHost == 0) { - maxRequestsPerHost = DEFAULT_MAX_REQUESTS_PER_HOST; - } - if (podRetention == null) { - podRetention = PodRetention.getKubernetesCloudDefault(); - } - setConnectTimeout(connectTimeout); - setReadTimeout(readTimeout); - setRetentionTimeout(retentionTimeout); - if (waitForPodSec == null) { - waitForPodSec = DEFAULT_WAIT_FOR_POD_SEC; - } - if (podLabels == null && labels != null) { - setPodLabels(PodLabel.fromMap(labels)); - } - if (containerCap != null && containerCap == 0) { - containerCap = null; - } - return this; + + templates + '}'; } @Override @@ -1520,7 +1136,7 @@ public static void hpiRunInit() { String hostAddress = System.getProperty("jenkins.host.address"); if (hostAddress != null && jenkins.clouds.getAll(ArmadaCloud.class).isEmpty()) { - ArmadaCloud cloud = new ArmadaCloud("kubernetes"); + ArmadaCloud cloud = new ArmadaCloud("armada"); cloud.setJenkinsUrl( "http://" + hostAddress + ":" + SystemProperties.getInteger("port", 8080) + "/jenkins/"); jenkins.clouds.add(cloud); diff --git a/src/main/java/io/armadaproject/jenkins/plugin/ArmadaClusterInfoProvider.java b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaClusterInfoProvider.java new file mode 100644 index 000000000..460d18953 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaClusterInfoProvider.java @@ -0,0 +1,131 @@ +package io.armadaproject.jenkins.plugin; + +import hudson.Extension; +import hudson.XmlFile; +import hudson.model.Saveable; +import hudson.model.listeners.SaveableListener; +import jenkins.model.Jenkins; +import org.apache.commons.lang.StringUtils; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +public final class ArmadaClusterInfoProvider { + @Extension(ordinal = 2) + public static class SaveableListenerImpl extends SaveableListener { + @Override + public void onChange(Saveable o, XmlFile file) { + if (o instanceof Jenkins) { + Jenkins jenkins = (Jenkins) o; + ArmadaClusterInfoProvider.reconfigure(jenkins.clouds.getAll(ArmadaCloud.class)); + } + super.onChange(o, file); + } + } + + public static class ClusterInfo { + private final String name; + private final String apiUrl; + private final String serverCertificate; + + public ClusterInfo(String name, String apiUrl, String serverCertificate) { + this.name = name; + this.apiUrl = apiUrl; + this.serverCertificate = serverCertificate; + } + + public String getName() { + return name; + } + + public String getApiUrl() { + return apiUrl; + } + + public String getServerCertificate() { + return serverCertificate; + } + } + + private static final Logger LOGGER = Logger.getLogger(ArmadaClusterInfoProvider.class.getName()); + private static final ConcurrentHashMap lastConfigPaths = new ConcurrentHashMap<>(); + private static final ConcurrentHashMap> clusterInfoMap = new ConcurrentHashMap<>(); + + private ArmadaClusterInfoProvider() {} + + public static ClusterInfo resolveClusterInfo(ArmadaCloud cloud, String clusterName) { + var clusterInfos = clusterInfoMap.getOrDefault(cloud.getDisplayName(), Collections.emptyMap()); + var clusterInfo = clusterInfos.get(clusterName); + if (clusterInfo == null) { + throw new IllegalStateException("Unable to resolve cluster info for " + clusterName); + } + return clusterInfo; + } + + private static void reconfigure(List clouds) { + var keys = new HashSet<>(clusterInfoMap.keySet()); + for (var cloud : clouds) { + var displayName = cloud.getDisplayName(); + try { + if(!StringUtils.equals(cloud.getArmadaClusterConfigPath(), lastConfigPaths.get(displayName))) { + LOGGER.info("Updating cluster info for " + displayName); + clusterInfoMap.put(displayName, parse(cloud.getArmadaClusterConfigPath())); + lastConfigPaths.put(displayName, cloud.getArmadaClusterConfigPath()); + LOGGER.info("Updated cluster info for cloud " + cloud.getDisplayName()); + } + } catch(Throwable e) { + LOGGER.log(Level.SEVERE, "Failed to update cluster info for cloud " + displayName, e); + } + keys.remove(displayName); + } + for(var cloudName : keys) { + clusterInfoMap.remove(cloudName); + lastConfigPaths.remove(cloudName); + } + } + + private static Map parse(String configPath) { + var factory = DocumentBuilderFactory.newInstance(); + try { + var builder = factory.newDocumentBuilder(); + + Document doc = builder.parse(configPath); + doc.getDocumentElement().normalize(); + + Map clusterMap = new HashMap<>(); + + var clusterList = doc.getElementsByTagName("cluster"); + for (int i = 0; i < clusterList.getLength(); i++) { + var clusterNode = clusterList.item(i); + + if (clusterNode.getNodeType() == Node.ELEMENT_NODE) { + var clusterElement = (Element) clusterNode; + + var name = clusterElement.getElementsByTagName("name").item(0).getTextContent(); + var url = clusterElement.getElementsByTagName("url").item(0).getTextContent(); + String certData = null; + if (clusterElement.getElementsByTagName("cert_data").getLength() == 1) { + certData = clusterElement.getElementsByTagName("cert_data").item(0).getTextContent(); + } + + clusterMap.put(name, new ClusterInfo(name, url, certData)); + } + } + + LOGGER.info("Loaded "+ clusterMap.size() + " clusters from " + configPath); + + return clusterMap; + } catch (ParserConfigurationException | SAXException | IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/ArmadaComputer.java b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaComputer.java new file mode 100644 index 000000000..bf7e9b782 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaComputer.java @@ -0,0 +1,118 @@ +package io.armadaproject.jenkins.plugin; + +import edu.umd.cs.findbugs.annotations.NonNull; +import hudson.model.Computer; +import hudson.model.Executor; +import hudson.model.Queue; +import hudson.security.ACL; +import hudson.security.Permission; +import hudson.slaves.AbstractCloudComputer; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.acegisecurity.Authentication; + +/** + * @author Carlos Sanchez carlos@apache.org + */ +public class ArmadaComputer extends AbstractCloudComputer { + private static final Logger LOGGER = Logger.getLogger(ArmadaComputer.class.getName()); + private static final ConcurrentMap retries = new ConcurrentHashMap<>(); + + private boolean launching; + + public ArmadaComputer(ArmadaSlave slave) { + super(slave); + } + + @Override + public void taskAccepted(Executor executor, Queue.Task task) { + super.taskAccepted(executor, task); + Queue.Executable exec = executor.getCurrentExecutable(); + LOGGER.log(Level.FINE, " Computer {0} accepted task {1}", new Object[] {this, exec}); + } + + @Override + public void taskCompleted(Executor executor, Queue.Task task, long durationMS) { + Queue.Executable exec = executor.getCurrentExecutable(); + LOGGER.log(Level.FINE, " Computer {0} completed task {1}", new Object[] {this, exec}); + + // May take the agent offline and remove it, in which case getNode() + // above would return null and we'd not find our DockerSlave anymore. + super.taskCompleted(executor, task, durationMS); + } + + @Override + public void taskCompletedWithProblems(Executor executor, Queue.Task task, long durationMS, Throwable problems) { + super.taskCompletedWithProblems(executor, task, durationMS, problems); + Queue.Executable exec = executor.getCurrentExecutable(); + LOGGER.log(Level.FINE, " Computer {0} completed task {1} with problems", new Object[] {this, exec}); + } + + @Override + public String toString() { + return String.format("KubernetesComputer name: %s agent: %s", getName(), getNode()); + } + + @Override + @NonNull + public ACL getACL() { + final ACL base = super.getACL(); + return new KubernetesComputerACL(base); + } + + /** + * Simple static inner class to be used by {@link #getACL()}. + * It replaces an anonymous inner class in order to fix + * SIC_INNER_SHOULD_BE_STATIC_ANON. + */ + private static final class KubernetesComputerACL extends ACL { + + private final ACL base; + + public KubernetesComputerACL(final ACL base) { + this.base = base; + } + + @Override + public boolean hasPermission(Authentication a, Permission permission) { + return permission == Computer.CONFIGURE ? false : base.hasPermission(a, permission); + } + } + + public void setLaunching(boolean launching) { + this.launching = launching; + } + + /** + * + * @return true if the Pod has been created in Kubernetes and the current instance is waiting for the pod to be usable. + */ + public boolean isLaunching() { + return launching; + } + + @Override + public void setAcceptingTasks(boolean acceptingTasks) { + super.setAcceptingTasks(acceptingTasks); + if (acceptingTasks) { + launching = false; + } + } + + public int getRetryCount() { + return retries.computeIfAbsent(getNode().getLabelString(), k -> new AtomicInteger(0)).get(); + } + + public void incrementRetry() { + retries.computeIfAbsent(getNode().getLabelString(), k -> new AtomicInteger(0)).incrementAndGet(); + } + + public void clearRetryCount() { + retries.remove(getNode().getLabelString()); + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/ArmadaComputerFactory.java b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaComputerFactory.java new file mode 100644 index 000000000..8ad204870 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaComputerFactory.java @@ -0,0 +1,37 @@ +package io.armadaproject.jenkins.plugin; + +import hudson.ExtensionList; +import hudson.ExtensionPoint; + +/** + * A factory of {@link ArmadaComputer} instances. + */ +public abstract class ArmadaComputerFactory implements ExtensionPoint { + /** + * Returns all registered implementations of {@link ArmadaComputerFactory}. + * @return all registered implementations of {@link ArmadaComputerFactory}. + */ + public static ExtensionList all() { + return ExtensionList.lookup(ArmadaComputerFactory.class); + } + + /** + * Returns a new instance of {@link ArmadaComputer}. + * @return a new instance of {@link ArmadaComputer}. + */ + public static ArmadaComputer createInstance(ArmadaSlave slave) { + for (ArmadaComputerFactory factory : all()) { + ArmadaComputer kubernetesComputer = factory.newInstance(slave); + if (kubernetesComputer != null) { + return kubernetesComputer; + } + } + return new ArmadaComputer(slave); + } + + /** + * Creates a new instance of {@link ArmadaComputer}. + * @return a new instance of {@link ArmadaComputer}. + */ + public abstract ArmadaComputer newInstance(ArmadaSlave slave); +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/ArmadaLauncher.java b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaLauncher.java new file mode 100644 index 000000000..0fa58d7e0 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/ArmadaLauncher.java @@ -0,0 +1,304 @@ +/* + * The MIT License + * + * Copyright (c) 2017, CloudBees, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.armadaproject.jenkins.plugin; + +import static io.armadaproject.jenkins.plugin.job.ArmadaClientUtil.lookoutUrlForJob; +import static java.util.logging.Level.INFO; +import static java.util.logging.Level.WARNING; + +import api.SubmitOuterClass.JobState; +import edu.umd.cs.findbugs.annotations.CheckForNull; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import hudson.Functions; +import hudson.model.Descriptor; +import hudson.model.Run; +import hudson.model.TaskListener; +import hudson.slaves.ComputerLauncher; +import hudson.slaves.JNLPLauncher; +import hudson.slaves.SlaveComputer; +import io.armadaproject.jenkins.plugin.job.ArmadaLaunchFailedOfflineCause; +import io.armadaproject.jenkins.plugin.job.ArmadaState; +import io.armadaproject.jenkins.plugin.pod.decorator.PodDecoratorException; +import io.armadaproject.jenkins.plugin.pod.retention.Reaper; +import io.fabric8.kubernetes.api.model.Pod; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; + +import io.fabric8.kubernetes.client.KubernetesClientException; +import io.grpc.StatusRuntimeException; +import jenkins.metrics.api.Metrics; +import jenkins.model.Jenkins; +import org.kohsuke.stapler.DataBoundConstructor; + +/** + * Launches on Kubernetes the specified {@link ArmadaComputer} instance. + */ +public class ArmadaLauncher extends JNLPLauncher { + // Report progress every 30 seconds + private static final long REPORT_INTERVAL = TimeUnit.SECONDS.toMillis(30L); + + private static final Collection JOB_TERMINATED_STATES = + Collections.unmodifiableCollection(Arrays.asList(JobState.FAILED, JobState.SUCCEEDED, + JobState.REJECTED)); + + private static final Logger LOGGER = Logger.getLogger(ArmadaLauncher.class.getName()); + private static final int MAX_RETRIES = 5; + + private volatile boolean launched = false; + + /** + * Provisioning exception if any. + */ + @CheckForNull + private transient Throwable problem; + + @DataBoundConstructor + public ArmadaLauncher(String tunnel, String vmargs) { + super(tunnel, vmargs); + } + + public ArmadaLauncher() { + super(); + } + + @Override + public boolean isLaunchSupported() { + return !launched; + } + + @Override + @SuppressFBWarnings(value = {"SWL_SLEEP_WITH_LOCK_HELD", "REC_CATCH_EXCEPTION", + "NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE"}, + justification = "This is fine") + public synchronized void launch(SlaveComputer computer, TaskListener listener) { + if (!(computer instanceof ArmadaComputer)) { + throw new IllegalArgumentException("This Launcher can be used only with KubernetesComputer"); + } + // Activate reaper if it never got activated. + Reaper.getInstance().maybeActivate(); + ArmadaComputer kubernetesComputer = (ArmadaComputer) computer; + computer.setAcceptingTasks(false); + ArmadaSlave agent = kubernetesComputer.getNode(); + if (agent == null) { + throw new IllegalStateException("Node has been removed, cannot launch " + computer.getName()); + } + if (launched) { + LOGGER.log(INFO, "Agent has already been launched, activating: {0}", agent.getNodeName()); + computer.setAcceptingTasks(true); + return; + } + + var jobManager = ArmadaState.getJobManager(agent.getArmadaCloud()); + try { + var retryCount = kubernetesComputer.getRetryCount(); + if(retryCount >= MAX_RETRIES) { + PodUtils.cancelQueueItemFor(agent, "RetriesExceeded"); + computer.disconnect(new ArmadaLaunchFailedOfflineCause("Provisioning failed")); + throw new RuntimeException("Agent failed to launch after " + MAX_RETRIES + " retries"); + } + + PodTemplate template = agent.getTemplate(); + ArmadaCloud cloud = agent.getArmadaCloud(); + Pod pod; + try { + pod = template.build(agent); + } catch (PodDecoratorException e) { + Run run = template.getRun(); + if (run != null) { + template.getListener().getLogger().println("Failed to build pod definition : " + e.getMessage()); + PodUtils.cancelQueueItemFor(run.getUrl(), template.getLabel(), e.getMessage(), null); + } + e.printStackTrace(listener.fatalError("Failed to build pod definition")); + setProblem(e); + terminateOrLog(agent); + return; + } + + agent.setNamespace(cloud.getArmadaNamespace()); + agent.setPodSpec(pod.getSpec()); + var existingJobId = agent.getArmadaJobId(); + var jobMetadata = jobManager.ensurePod(agent.getArmadaJobSetId(), existingJobId, pod); + agent.setArmadaJobId(jobMetadata.getJobId()); + agent.setArmadaJobSetId(jobMetadata.getJobSetId()); + agent.save(); + + String armadaLookoutJobUrl = lookoutUrlForJob( + cloud.getArmadaLookoutUrl(), + Integer.parseInt(cloud.getArmadaLookoutPort()), + cloud.getArmadaQueue(), + agent.getArmadaJobSetId(), + agent.getArmadaJobId()); + LOGGER.log(INFO, () -> "Submitted job: " + armadaLookoutJobUrl); + listener.getLogger().printf("Submitted job: %s %n", armadaLookoutJobUrl); + Metrics.metricRegistry().counter(MetricNames.JOBS_SUBMITTED).inc(); + + agent.getRunListener().getLogger().printf("Submitted job: %s %n", + armadaLookoutJobUrl); + + kubernetesComputer.setLaunching(true); + + var metadata = jobManager.waitUntilRunning(jobMetadata.getJobSetId(), jobMetadata.getJobId(), template.getSlaveConnectTimeout(), TimeUnit.SECONDS); + LOGGER.log(INFO, () -> "Job is running: " + agent.getArmadaJobId()); + agent.getRunListener().getLogger().printf("Job is running: %s %n", agent.getArmadaJobId()); + agent.setClusterId(metadata.getClusterId()); + agent.assignPod(metadata.getPodName()); + agent.save(); + + LOGGER.log(INFO, () -> "Agent metadata updated: " + agent.getArmadaJobId()); + + kubernetesComputer.clearRetryCount(); + + // We need the pod to be running and connected before returning + // otherwise this method keeps being called multiple times + // so wait for agent to be online + int waitForSlaveToConnect = template.getSlaveConnectTimeout(); + int waitedForSlave; + + SlaveComputer slaveComputer = null; + String status = null; + long lastReportTimestamp = System.currentTimeMillis(); + for (waitedForSlave = 0; waitedForSlave < waitForSlaveToConnect; waitedForSlave++) { + slaveComputer = agent.getComputer(); + if (slaveComputer == null) { + Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); + throw new IllegalStateException("Node was deleted, computer is null"); + } + if (slaveComputer.isOnline()) { + break; + } + + // Check that the job hasn't failed already + if (jobManager.hasFailed(jobMetadata.getJobSetId(), jobMetadata.getJobId())) { + Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); + throw new IllegalStateException("Job failed: " + + agent.getArmadaJobId()); + } + + var terminated = jobManager.hasTerminated(jobMetadata.getJobSetId(), jobMetadata.getJobId()); + var launchFailed = terminated; + if(!terminated) { + var kc = agent.connect(); + try { + var podStatus = kc.pods() + .inNamespace(agent.getNamespace()) + .withName(agent.getPodName()) + .get() + .getStatus(); + launchFailed = podStatus.getContainerStatuses().stream().anyMatch(s -> s.getState().getTerminated() != null); + } catch(KubernetesClientException e) { + LOGGER.log(WARNING, "Failed to verify pod " + agent.getPodName() + " status after agent connected...assuming failed.", e); + launchFailed = true; + } + } + if (launchFailed) { + Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); + Metrics.metricRegistry() + .counter(MetricNames.metricNameForPodStatus("TERMINATED")) + .inc(); + throw new IllegalStateException("Job '" + agent.getArmadaJobId() + + "' or one of the containers in it are in terminated state"); + } + + if (lastReportTimestamp + REPORT_INTERVAL < System.currentTimeMillis()) { + LOGGER.log(INFO, "Waiting for agent to connect ({1}/{2}): {0}", new Object[] { + agent.getArmadaJobId(), waitedForSlave, waitForSlaveToConnect + }); + agent.getRunListener().getLogger() + .printf( + "Waiting for agent to connect (%2$s/%3$s): %1$s%n", + agent.getArmadaJobId(), waitedForSlave, waitForSlaveToConnect); + lastReportTimestamp = System.currentTimeMillis(); + } + Thread.sleep(1000); + } + if (slaveComputer == null || slaveComputer.isOffline()) { + Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); + Metrics.metricRegistry().counter(MetricNames.FAILED_TIMEOUT).inc(); + + throw new IllegalStateException( + "Agent is not connected after " + waitedForSlave + " seconds, status: " + status); + } + + computer.setAcceptingTasks(true); + launched = true; + try { + // We need to persist the "launched" setting... + agent.save(); + } catch (IOException e) { + LOGGER.log(Level.WARNING, "Could not save() agent: " + e.getMessage(), e); + } + Metrics.metricRegistry().counter(MetricNames.JOBS_LAUNCHED).inc(); + } catch (Throwable ex) { + setProblem(ex); + Functions.printStackTrace(ex, agent.getRunListener().error("Failed to launch " + agent.getArmadaJobId())); + LOGGER.log( + Level.WARNING, + String.format("Error in provisioning; agent=%s, template=%s", agent, agent.getTemplateId()), + ex); + LOGGER.log(Level.FINER, "Removing Jenkins node: {0}", agent.getNodeName()); + if(ex instanceof StatusRuntimeException) { + Metrics.metricRegistry().counter(MetricNames.CREATION_FAILED).inc(); + } + kubernetesComputer.incrementRetry(); + terminateOrLog(agent); + throw new RuntimeException(ex); + } + } + + private static void terminateOrLog(ArmadaSlave node) { + try { + node.terminate(); + } catch (IOException | InterruptedException e) { + LOGGER.log(Level.WARNING, "Unable to remove Jenkins node", e); + } + } + + /** + * The last problem that occurred, if any. + * @return + */ + @CheckForNull + public Throwable getProblem() { + return problem; + } + + public void setProblem(@CheckForNull Throwable problem) { + this.problem = problem; + } + + @Override + public Descriptor getDescriptor() { + return new DescriptorImpl(); + } + + // Only there to avoid throwing unnecessary exceptions. KubernetesLauncher is never instantiated via UI. + private static class DescriptorImpl extends Descriptor {} +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/DefaultInProvisioning.java b/src/main/java/io/armadaproject/jenkins/plugin/DefaultInProvisioning.java index 68eac0114..a6fcaec22 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/DefaultInProvisioning.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/DefaultInProvisioning.java @@ -26,7 +26,7 @@ private static boolean isNotAcceptingTasks(Node n) { public Set getInProvisioning(@CheckForNull Label label) { if (label != null) { return label.getNodes().stream() - .filter(KubernetesSlave.class::isInstance) + .filter(ArmadaSlave.class::isInstance) .filter(DefaultInProvisioning::isNotAcceptingTasks) .map(Node::getNodeName) .collect(Collectors.toSet()); diff --git a/src/main/java/io/armadaproject/jenkins/plugin/GarbageCollection.java b/src/main/java/io/armadaproject/jenkins/plugin/GarbageCollection.java index d9a12ef50..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/GarbageCollection.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/GarbageCollection.java @@ -1,219 +0,0 @@ -package io.armadaproject.jenkins.plugin; - -import static io.armadaproject.jenkins.plugin.PodTemplateUtils.sanitizeLabel; - -import edu.umd.cs.findbugs.annotations.NonNull; -import hudson.Extension; -import hudson.Main; -import hudson.Util; -import hudson.model.AbstractDescribableImpl; -import hudson.model.AsyncPeriodicWork; -import hudson.model.Descriptor; -import hudson.model.TaskListener; -import hudson.util.FormValidation; -import io.fabric8.kubernetes.api.model.Pod; -import io.fabric8.kubernetes.client.KubernetesClientException; -import java.io.IOException; -import java.time.Duration; -import java.time.Instant; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.logging.Level; -import java.util.logging.Logger; -import jenkins.model.Jenkins; -import jenkins.util.SystemProperties; -import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; -import org.kohsuke.stapler.DataBoundConstructor; -import org.kohsuke.stapler.DataBoundSetter; -import org.kohsuke.stapler.QueryParameter; - -/** - * Manages garbage collection of orphaned pods. - */ -public class GarbageCollection extends AbstractDescribableImpl { - public static final String ANNOTATION_LAST_REFRESH = "kubernetes.jenkins.io/last-refresh"; - private static final Logger LOGGER = Logger.getLogger(GarbageCollection.class.getName()); - - public static final int MINIMUM_GC_TIMEOUT = 120; - - private String namespaces; - private transient Set namespaceSet; - private int timeout; - - private static Long RECURRENCE_PERIOD = SystemProperties.getLong( - GarbageCollection.class.getName() + ".recurrencePeriod", - Main.isUnitTest ? 5 : TimeUnit.MINUTES.toSeconds(1)); - - @DataBoundConstructor - public GarbageCollection() {} - - public String getNamespaces() { - return namespaces; - } - - @DataBoundSetter - public void setNamespaces(String namespaces) { - this.namespaces = Util.fixEmptyAndTrim(namespaces); - if (this.namespaces == null) { - this.namespaceSet = Set.of(); - } else { - this.namespaceSet = Set.of(this.namespaces.split("\n")); - } - } - - public int getTimeout() { - return timeout; - } - - protected Object readResolve() { - if (namespaces != null) { - setNamespaces(namespaces); - } - return this; - } - - @DataBoundSetter - public void setTimeout(int timeout) { - if (Main.isUnitTest) { - this.timeout = timeout; - } else { - this.timeout = Math.max(timeout, MINIMUM_GC_TIMEOUT); - } - } - - public Duration getDurationTimeout() { - return Duration.ofSeconds(timeout); - } - - @NonNull - public Set getNamespaceSet() { - return namespaceSet == null ? Set.of() : namespaceSet; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - GarbageCollection that = (GarbageCollection) o; - return timeout == that.timeout && Objects.equals(namespaces, that.namespaces); - } - - @Override - public int hashCode() { - return Objects.hash(namespaces, timeout); - } - - @Override - public String toString() { - return "GarbageCollection{" + "namespaces='" + namespaces + '\'' + ", timeout=" + timeout + '}'; - } - - @Extension - public static class DescriptorImpl extends Descriptor { - @SuppressWarnings("unused") // stapler - public FormValidation doCheckTimeout(@QueryParameter String value) { - return FormValidation.validateIntegerInRange(value, MINIMUM_GC_TIMEOUT, Integer.MAX_VALUE); - } - } - - /** - * Annotate pods owned by live Kubernetes agents to help with garbage collection. - */ - @Extension - public static final class PeriodicGarbageCollection extends AsyncPeriodicWork { - public PeriodicGarbageCollection() { - super("Garbage collection of orphaned Kubernetes pods"); - } - - @Override - protected void execute(TaskListener listener) throws IOException, InterruptedException { - annotateLiveAgents(listener); - garbageCollect(); - } - - private static void annotateLiveAgents(TaskListener listener) { - Arrays.stream(Jenkins.get().getComputers()) - .filter(KubernetesComputer.class::isInstance) - .map(KubernetesComputer.class::cast) - .forEach(kc -> kc.annotateTtl(listener)); - } - - private static void garbageCollect() { - for (var cloud : Jenkins.get().clouds.getAll(ArmadaCloud.class)) { - Optional.ofNullable(cloud.getGarbageCollection()).ifPresent(gc -> { - try { - var client = cloud.connect(); - var namespaces = new HashSet(); - namespaces.add(client.getNamespace()); - namespaces.addAll(gc.getNamespaceSet()); - for (var ns : namespaces) { - client - .pods() - .inNamespace(ns) - // Only look at pods created by this controller - .withLabel(PodTemplateBuilder.LABEL_KUBERNETES_CONTROLLER, sanitizeLabel(cloud.getJenkinsUrlOrNull())) - .list() - .getItems() - .stream() - .filter(pod -> { - var lastRefresh = pod.getMetadata() - .getAnnotations() - .get(ANNOTATION_LAST_REFRESH); - if (lastRefresh != null) { - try { - var refreshTime = Long.parseLong(lastRefresh); - var now = Instant.now(); - LOGGER.log( - Level.FINE, - () -> getQualifiedName(pod) + " refresh diff = " - + (now.toEpochMilli() - refreshTime) + ", timeout is " - + gc.getDurationTimeout() - .toMillis()); - return Duration.between(Instant.ofEpochMilli(refreshTime), now) - .compareTo(gc.getDurationTimeout()) - > 0; - } catch (NumberFormatException e) { - LOGGER.log( - Level.WARNING, - e, - () -> "Unable to parse last refresh for pod " - + getQualifiedName(pod) + ", ignoring"); - return false; - } - } else { - LOGGER.log( - Level.FINE, () -> "Ignoring legacy pod " + getQualifiedName(pod)); - return false; - } - }) - .forEach(pod -> { - LOGGER.log(Level.INFO, () -> "Deleting orphan pod " + getQualifiedName(pod)); - client.resource(pod).delete(); - }); - } - } catch (KubernetesClientException e) { - LOGGER.log(Level.WARNING, "Unexpected error while calling Kubernetes API", e); - } catch (KubernetesAuthException e) { - LOGGER.log(Level.WARNING, "Error authenticating to Kubernetes", e); - } catch (IOException e) { - LOGGER.log(Level.WARNING, "Error while getting Kubernetes client", e); - } - }); - } - } - - private static String getQualifiedName(@NonNull Pod pod) { - var metadata = pod.getMetadata(); - return metadata.getNamespace() + "/" + metadata.getName(); - } - - @Override - public long getRecurrencePeriod() { - return TimeUnit.SECONDS.toMillis(RECURRENCE_PERIOD); - } - } -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesClientProvider.java b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesClientProvider.java index 9713390fc..e01d77bbe 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesClientProvider.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesClientProvider.java @@ -8,13 +8,16 @@ import hudson.model.Saveable; import hudson.model.listeners.SaveableListener; import io.fabric8.kubernetes.client.KubernetesClient; -import java.io.IOException; + import java.util.Arrays; import java.util.HashSet; +import java.util.Objects; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; + import jenkins.model.Jenkins; import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; import org.kohsuke.accmod.Restricted; @@ -36,7 +39,7 @@ public class KubernetesClientProvider { KubernetesClientProvider.class.getPackage().getName() + ".clients.cacheExpiration", TimeUnit.MINUTES.toSeconds(10)); - private static final Cache clients = Caffeine.newBuilder() + private static final Cache clients = Caffeine.newBuilder() .expireAfterWrite(CACHE_EXPIRATION, TimeUnit.SECONDS) .removalListener((key, value, cause) -> { Client client = (Client) value; @@ -49,37 +52,15 @@ public class KubernetesClientProvider { private KubernetesClientProvider() {} - static KubernetesClient createClient(ArmadaCloud cloud) throws KubernetesAuthException, IOException { - String displayName = cloud.getDisplayName(); - final Client c = clients.getIfPresent(displayName); - if (c == null) { - KubernetesClient client = new KubernetesFactoryAdapter( - cloud.getServerUrl(), - cloud.getNamespace(), - cloud.getServerCertificate(), - cloud.getCredentialsId(), - cloud.isSkipTlsVerify(), - cloud.getConnectTimeout(), - cloud.getReadTimeout(), - cloud.getMaxRequestsPerHost(), - cloud.isUseJenkinsProxy()) - .createClient(); - clients.put(displayName, new Client(getValidity(cloud), client)); - LOGGER.log(Level.FINE, "Created new Kubernetes client: {0} {1}", new Object[] {displayName, client}); - return client; - } - return c.getClient(); - } - - static KubernetesClient createClient(ArmadaCloud cloud, String serverUrl, String namespace) + static KubernetesClient createClient(ArmadaCloud cloud, String serverUrl, String caCertData) throws KubernetesAuthException { - String displayName = cloud.getDisplayName(); - final Client c = clients.getIfPresent(displayName); + CacheKey cacheKey = new CacheKey(cloud.getDisplayName(), serverUrl, caCertData); + final Client c = clients.getIfPresent(cacheKey); if (c == null) { KubernetesClient client = new KubernetesFactoryAdapter( serverUrl, - namespace, - cloud.getServerCertificate(), + cloud.getArmadaNamespace(), + caCertData, cloud.getCredentialsId(), cloud.isSkipTlsVerify(), cloud.getConnectTimeout(), @@ -87,8 +68,8 @@ static KubernetesClient createClient(ArmadaCloud cloud, String serverUrl, String cloud.getMaxRequestsPerHost(), cloud.isUseJenkinsProxy()) .createClient(); - clients.put(displayName, new Client(getValidity(cloud), client)); - LOGGER.log(Level.FINE, "Created new Kubernetes client: {0} {1}", new Object[] {displayName, client}); + clients.put(cacheKey, new Client(getValidity(cloud, serverUrl, caCertData), client)); + LOGGER.log(Level.FINE, "Created new Kubernetes client: {0} {1}", new Object[] {cacheKey, client}); return client; } return c.getClient(); @@ -101,11 +82,11 @@ static KubernetesClient createClient(ArmadaCloud cloud, String serverUrl, String * @return client validity hash code */ @Restricted(NoExternalUse.class) - public static int getValidity(@NonNull ArmadaCloud cloud) { + public static int getValidity(@NonNull ArmadaCloud cloud, String serverUrl, String caCertData) { Object[] cloudObjects = { - cloud.getServerUrl(), - cloud.getNamespace(), - cloud.getServerCertificate(), + serverUrl, + cloud.getArmadaNamespace(), + caCertData, cloud.getCredentialsId(), cloud.isSkipTlsVerify(), cloud.getConnectTimeout(), @@ -134,9 +115,33 @@ public int getValidity() { } } + public static class CacheKey { + private final String cloudDisplayName; + private final String serverUrl; + private final String caCertData; + + public CacheKey(String cloudDisplayName, String serverUrl, String caCertData) { + this.cloudDisplayName = cloudDisplayName; + this.serverUrl = serverUrl; + this.caCertData = caCertData; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof CacheKey)) return false; + CacheKey cacheKey = (CacheKey) o; + return Objects.equals(cloudDisplayName, cacheKey.cloudDisplayName) && Objects.equals(serverUrl, cacheKey.serverUrl) && Objects.equals(caCertData, cacheKey.caCertData); + } + + @Override + public int hashCode() { + return Objects.hash(cloudDisplayName, serverUrl, caCertData); + } + } + @Restricted(NoExternalUse.class) // testing only - public static void invalidate(String displayName) { - clients.invalidate(displayName); + public static void invalidate(CacheKey cacheKey) { + clients.invalidate(cacheKey); } @Restricted(NoExternalUse.class) // testing only @@ -151,20 +156,26 @@ public static class SaveableListenerImpl extends SaveableListener { public void onChange(Saveable o, XmlFile file) { if (o instanceof Jenkins) { Jenkins jenkins = (Jenkins) o; - Set cloudDisplayNames = new HashSet<>(clients.asMap().keySet()); + Set cacheKeys = new HashSet<>(clients.asMap().keySet()); for (ArmadaCloud cloud : jenkins.clouds.getAll(ArmadaCloud.class)) { String displayName = cloud.getDisplayName(); - Client client = clients.getIfPresent(displayName); - if (client == null || client.getValidity() == getValidity(cloud)) { - cloudDisplayNames.remove(displayName); + Set cloudCacheKeys = cacheKeys.stream() + .filter(c -> displayName.equals(c.cloudDisplayName)) + .collect(Collectors.toSet()); + + for(CacheKey cacheKey : cloudCacheKeys) { + Client client = clients.getIfPresent(cacheKey); + if (client == null || client.getValidity() == getValidity(cloud, cacheKey.serverUrl, cacheKey.caCertData)) { + cacheKeys.remove(cacheKey); + } } } // Remove missing / invalid clients - for (String displayName : cloudDisplayNames) { + for (CacheKey cacheKey : cacheKeys) { LOGGER.log( Level.INFO, - () -> "Invalidating Kubernetes client: " + displayName + clients.getIfPresent(displayName)); - invalidate(displayName); + () -> "Invalidating Kubernetes client: " + cacheKey.cloudDisplayName + "-" + cacheKey.serverUrl + clients.getIfPresent(cacheKey)); + invalidate(cacheKey); } } super.onChange(o, file); diff --git a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesComputer.java b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesComputer.java index 1ca147e2d..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesComputer.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesComputer.java @@ -1,221 +0,0 @@ -package io.armadaproject.jenkins.plugin; - -import edu.umd.cs.findbugs.annotations.NonNull; -import hudson.model.Computer; -import hudson.model.Executor; -import hudson.model.Queue; -import hudson.model.TaskListener; -import hudson.security.ACL; -import hudson.security.Permission; -import hudson.slaves.AbstractCloudComputer; -import io.fabric8.kubernetes.api.model.Container; -import io.fabric8.kubernetes.api.model.Event; -import io.fabric8.kubernetes.api.model.EventList; -import io.fabric8.kubernetes.api.model.ObjectMeta; -import io.fabric8.kubernetes.api.model.Pod; -import io.fabric8.kubernetes.client.KubernetesClient; -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.logging.Level; -import java.util.logging.Logger; -import jenkins.model.Jenkins; -import org.acegisecurity.Authentication; -import org.apache.commons.lang.StringUtils; -import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; -import org.kohsuke.stapler.QueryParameter; -import org.kohsuke.stapler.StaplerRequest; -import org.kohsuke.stapler.StaplerResponse; -import org.kohsuke.stapler.export.Exported; -import org.kohsuke.stapler.framework.io.ByteBuffer; -import org.kohsuke.stapler.framework.io.LargeText; - -/** - * @author Carlos Sanchez carlos@apache.org - */ -public class KubernetesComputer extends AbstractCloudComputer { - private static final Logger LOGGER = Logger.getLogger(KubernetesComputer.class.getName()); - - private boolean launching; - private String armadaJobId = ""; - - public KubernetesComputer(KubernetesSlave slave) { - super(slave); - } - - @Override - public void taskAccepted(Executor executor, Queue.Task task) { - super.taskAccepted(executor, task); - Queue.Executable exec = executor.getCurrentExecutable(); - LOGGER.log(Level.FINE, " Computer {0} accepted task {1}", new Object[] {this, exec}); - } - - @Override - public void taskCompleted(Executor executor, Queue.Task task, long durationMS) { - Queue.Executable exec = executor.getCurrentExecutable(); - LOGGER.log(Level.FINE, " Computer {0} completed task {1}", new Object[] {this, exec}); - - // May take the agent offline and remove it, in which case getNode() - // above would return null and we'd not find our DockerSlave anymore. - super.taskCompleted(executor, task, durationMS); - } - - @Override - public void taskCompletedWithProblems(Executor executor, Queue.Task task, long durationMS, Throwable problems) { - super.taskCompletedWithProblems(executor, task, durationMS, problems); - Queue.Executable exec = executor.getCurrentExecutable(); - LOGGER.log(Level.FINE, " Computer {0} completed task {1} with problems", new Object[] {this, exec}); - } - - @Exported - public List getContainers() throws KubernetesAuthException, IOException { - if (!Jenkins.get().hasPermission(Computer.EXTENDED_READ)) { - LOGGER.log(Level.FINE, " Computer {0} getContainers, lack of admin permission, returning empty list", this); - return Collections.emptyList(); - } - - KubernetesSlave slave = getNode(); - if (slave == null) { - return Collections.emptyList(); - } - - ArmadaCloud cloud = slave.getKubernetesCloud(); - KubernetesClient client = cloud.connect(); - - String namespace = StringUtils.defaultIfBlank(slave.getNamespace(), client.getNamespace()); - Pod pod = client.pods().inNamespace(namespace).withName(getName()).get(); - - if (pod == null) { - return Collections.emptyList(); - } - - return pod.getSpec().getContainers(); - } - - @Exported - public List getPodEvents() throws KubernetesAuthException, IOException { - if (!Jenkins.get().hasPermission(Computer.EXTENDED_READ)) { - LOGGER.log(Level.FINE, " Computer {0} getPodEvents, lack of admin permission, returning empty list", this); - return Collections.emptyList(); - } - - KubernetesSlave slave = getNode(); - if (slave != null) { - ArmadaCloud cloud = slave.getKubernetesCloud(); - KubernetesClient client = cloud.connect(); - - String namespace = StringUtils.defaultIfBlank(slave.getNamespace(), client.getNamespace()); - - Pod pod = client.pods().inNamespace(namespace).withName(getName()).get(); - if (pod != null) { - ObjectMeta podMeta = pod.getMetadata(); - String podNamespace = podMeta.getNamespace(); - - Map fields = new HashMap<>(); - fields.put("involvedObject.uid", podMeta.getUid()); - fields.put("involvedObject.name", podMeta.getName()); - fields.put("involvedObject.namespace", podNamespace); - - EventList eventList = client.v1() - .events() - .inNamespace(podNamespace) - .withFields(fields) - .list(); - if (eventList != null) { - return eventList.getItems(); - } - } - } - - return Collections.emptyList(); - } - - public void doContainerLog(@QueryParameter String containerId, StaplerRequest req, StaplerResponse rsp) - throws KubernetesAuthException, IOException { - Jenkins.get().checkPermission(Computer.EXTENDED_READ); - - ByteBuffer outputStream = new ByteBuffer(); - KubernetesSlave slave = getNode(); - if (slave != null) { - ArmadaCloud cloud = slave.getKubernetesCloud(); - KubernetesClient client = cloud.connect(); - - String namespace = StringUtils.defaultIfBlank(slave.getNamespace(), client.getNamespace()); - - client.pods() - .inNamespace(namespace) - .withName(getName()) - .inContainer(containerId) - .tailingLines(20) - .watchLog(outputStream); - } - - new LargeText(outputStream, false).doProgressText(req, rsp); - } - - @Override - public String toString() { - return String.format("KubernetesComputer name: %s agent: %s", getName(), getNode()); - } - - @Override - @NonNull - public ACL getACL() { - final ACL base = super.getACL(); - return new KubernetesComputerACL(base); - } - - public void annotateTtl(TaskListener listener) { - Optional.ofNullable(getNode()).ifPresent(ks -> ks.annotateTtl(listener)); - } - - /** - * Simple static inner class to be used by {@link #getACL()}. - * It replaces an anonymous inner class in order to fix - * SIC_INNER_SHOULD_BE_STATIC_ANON. - */ - private static final class KubernetesComputerACL extends ACL { - - private final ACL base; - - public KubernetesComputerACL(final ACL base) { - this.base = base; - } - - @Override - public boolean hasPermission(Authentication a, Permission permission) { - return permission == Computer.CONFIGURE ? false : base.hasPermission(a, permission); - } - } - - public void setLaunching(boolean launching) { - this.launching = launching; - } - - /** - * - * @return true if the Pod has been created in Kubernetes and the current instance is waiting for the pod to be usable. - */ - public boolean isLaunching() { - return launching; - } - - public String getArmadaJobId() { - return armadaJobId; - } - - public void setArmadaJobId(String armadaJobId) { - this.armadaJobId = armadaJobId; - } - - @Override - public void setAcceptingTasks(boolean acceptingTasks) { - super.setAcceptingTasks(acceptingTasks); - if (acceptingTasks) { - launching = false; - } - } -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesComputerFactory.java b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesComputerFactory.java index 52f805fda..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesComputerFactory.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesComputerFactory.java @@ -1,37 +0,0 @@ -package io.armadaproject.jenkins.plugin; - -import hudson.ExtensionList; -import hudson.ExtensionPoint; - -/** - * A factory of {@link KubernetesComputer} instances. - */ -public abstract class KubernetesComputerFactory implements ExtensionPoint { - /** - * Returns all registered implementations of {@link KubernetesComputerFactory}. - * @return all registered implementations of {@link KubernetesComputerFactory}. - */ - public static ExtensionList all() { - return ExtensionList.lookup(KubernetesComputerFactory.class); - } - - /** - * Returns a new instance of {@link KubernetesComputer}. - * @return a new instance of {@link KubernetesComputer}. - */ - public static KubernetesComputer createInstance(KubernetesSlave slave) { - for (KubernetesComputerFactory factory : all()) { - KubernetesComputer kubernetesComputer = factory.newInstance(slave); - if (kubernetesComputer != null) { - return kubernetesComputer; - } - } - return new KubernetesComputer(slave); - } - - /** - * Creates a new instance of {@link KubernetesComputer}. - * @return a new instance of {@link KubernetesComputer}. - */ - public abstract KubernetesComputer newInstance(KubernetesSlave slave); -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesFactoryAdapter.java b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesFactoryAdapter.java index 0ca7d538f..eb8938b86 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesFactoryAdapter.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesFactoryAdapter.java @@ -202,7 +202,7 @@ public KubernetesClient createClient() throws KubernetesAuthException { } } } - return new KubernetesClientBuilder().withConfig(builder.build()).build(); + return new KubernetesClientBuilder().withConfig(builder.withHttp2Disable().build()).build(); } /** diff --git a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesFolderProperty.java b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesFolderProperty.java index fd2c47725..a7c6081df 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesFolderProperty.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesFolderProperty.java @@ -32,7 +32,7 @@ /** * Provides folder level Kubernetes configuration. */ -public class KubernetesFolderProperty extends AbstractFolderProperty> { +public class ArmadaFolderProperty extends AbstractFolderProperty> { private static final String PREFIX_USAGE_PERMISSION = "usage-permission-"; @@ -42,7 +42,7 @@ public class KubernetesFolderProperty extends AbstractFolderProperty permittedClouds) { @@ -60,13 +60,13 @@ private static Set getInheritedClouds(ItemGroup parent) { } @SuppressWarnings({"rawtypes"}) - public static boolean isAllowed(KubernetesSlave agent, Job job) { + public static boolean isAllowed(ArmadaSlave agent, Job job) { ItemGroup parent = job.getParent(); Set allowedClouds = new HashSet<>(); - ArmadaCloud targetCloud = agent.getKubernetesCloud(); + ArmadaCloud targetCloud = agent.getArmadaCloud(); if (targetCloud.isUsageRestricted()) { - KubernetesFolderProperty.collectAllowedClouds(allowedClouds, parent); + ArmadaFolderProperty.collectAllowedClouds(allowedClouds, parent); return allowedClouds.contains(targetCloud.name); } return true; @@ -115,11 +115,11 @@ public AbstractFolderProperty reconfigure(StaplerRequest req, JSONObject form public static void collectAllowedClouds(Set allowedClouds, ItemGroup itemGroup) { if (itemGroup instanceof AbstractFolder) { AbstractFolder folder = (AbstractFolder) itemGroup; - KubernetesFolderProperty kubernetesFolderProperty = - folder.getProperties().get(KubernetesFolderProperty.class); + ArmadaFolderProperty armadaFolderProperty = + folder.getProperties().get(ArmadaFolderProperty.class); - if (kubernetesFolderProperty != null) { - allowedClouds.addAll(kubernetesFolderProperty.getPermittedClouds()); + if (armadaFolderProperty != null) { + allowedClouds.addAll(armadaFolderProperty.getPermittedClouds()); } collectAllowedClouds(allowedClouds, folder.getParent()); @@ -194,7 +194,7 @@ public static class DescriptorImpl extends AbstractFolderPropertyDescriptor { @NonNull @Override public String getDisplayName() { - return Messages.KubernetesFolderProperty_displayName(); + return Messages.ArmadaFolderProperty_displayName(); } @SuppressWarnings("unused") // Used by jelly diff --git a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesLauncher.java b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesLauncher.java index 486476a09..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesLauncher.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesLauncher.java @@ -1,370 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2017, CloudBees, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -package io.armadaproject.jenkins.plugin; - -import static java.util.logging.Level.FINE; -import static java.util.logging.Level.INFO; -import static java.util.logging.Level.WARNING; - -import api.Job.JobStatusRequest; -import api.Job.JobStatusResponse; -import api.SubmitOuterClass.JobState; -import api.SubmitOuterClass.JobSubmitResponse; -import edu.umd.cs.findbugs.annotations.CheckForNull; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import hudson.Functions; -import hudson.model.Descriptor; -import hudson.model.Run; -import hudson.model.TaskListener; -import hudson.slaves.ComputerLauncher; -import hudson.slaves.JNLPLauncher; -import hudson.slaves.SlaveComputer; -import io.armadaproject.ArmadaClient; -import io.armadaproject.ArmadaMapper; -import io.armadaproject.jenkins.plugin.pod.decorator.PodDecoratorException; -import io.armadaproject.jenkins.plugin.pod.retention.Reaper; -import io.fabric8.kubernetes.api.model.ContainerStatus; -import io.fabric8.kubernetes.api.model.Pod; -import io.fabric8.kubernetes.client.KubernetesClientException; -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Date; -import java.util.List; -import java.util.concurrent.TimeUnit; -import java.util.logging.Level; -import java.util.logging.Logger; -import jenkins.metrics.api.Metrics; -import org.apache.commons.lang.StringUtils; -import org.awaitility.Awaitility; -import org.kohsuke.stapler.DataBoundConstructor; - -/** - * Launches on Kubernetes the specified {@link KubernetesComputer} instance. - */ -public class KubernetesLauncher extends JNLPLauncher { - // Report progress every 30 seconds - private static final long REPORT_INTERVAL = TimeUnit.SECONDS.toMillis(30L); - - private static final Collection JOB_TERMINATED_STATES = - Collections.unmodifiableCollection(Arrays.asList(JobState.FAILED, JobState.SUCCEEDED, - JobState.REJECTED)); - - private static final Logger LOGGER = Logger.getLogger(KubernetesLauncher.class.getName()); - - private volatile boolean launched = false; - - /** - * Provisioning exception if any. - */ - @CheckForNull - private transient Throwable problem; - - @DataBoundConstructor - public KubernetesLauncher(String tunnel, String vmargs) { - super(tunnel, vmargs); - } - - public KubernetesLauncher() { - super(); - } - - @Override - public boolean isLaunchSupported() { - return !launched; - } - - @Override - @SuppressFBWarnings(value = {"SWL_SLEEP_WITH_LOCK_HELD", "REC_CATCH_EXCEPTION", - "NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE"}, - justification = "This is fine") - public synchronized void launch(SlaveComputer computer, TaskListener listener) { - if (!(computer instanceof KubernetesComputer)) { - throw new IllegalArgumentException("This Launcher can be used only with KubernetesComputer"); - } - // Activate reaper if it never got activated. - Reaper.getInstance().maybeActivate(); - KubernetesComputer kubernetesComputer = (KubernetesComputer) computer; - computer.setAcceptingTasks(false); - KubernetesSlave node = kubernetesComputer.getNode(); - if (node == null) { - throw new IllegalStateException("Node has been removed, cannot launch " + computer.getName()); - } - if (launched) { - LOGGER.log(INFO, "Agent has already been launched, activating: {0}", node.getNodeName()); - computer.setAcceptingTasks(true); - return; - } - - String cloudName = node.getCloudName(); - - try(ArmadaClient armadaClient = node.getKubernetesCloud().connectToArmada()) { - PodTemplate template = node.getTemplate(); - ArmadaCloud cloud = node.getKubernetesCloud(); - Pod pod; - try { - pod = template.build(node); - } catch (PodDecoratorException e) { - Run run = template.getRun(); - if (run != null) { - template.getListener().getLogger().println("Failed to build pod definition : " + e.getMessage()); - PodUtils.cancelQueueItemFor(run.getUrl(), template.getLabel(), e.getMessage(), null); - } - e.printStackTrace(listener.fatalError("Failed to build pod definition")); - setProblem(e); - terminateOrLog(node); - return; - } - node.assignPod(pod); - - String podName = pod.getMetadata().getName(); - - String namespace = Arrays.asList( - pod.getMetadata().getNamespace(), template.getNamespace()) - .stream() - .filter(s -> StringUtils.isNotBlank(s)) - .findFirst() - .orElse(null); - node.setNamespace(namespace); - - // if the controller was interrupted after creating the pod but before it connected back, then - // the pod might already exist and the creating logic must be skipped. - JobStatusResponse jobStatusResponse = armadaClient.getJobStatus( - JobStatusRequest.newBuilder() - .addJobIds(kubernetesComputer.getArmadaJobId()) - .build()); - JobState existingJobState = jobStatusResponse.getJobStatesMap() - .get(kubernetesComputer.getArmadaJobId()); - - LOGGER.info("Job with id:" + kubernetesComputer.getArmadaJobId() + " in state: " - + existingJobState); - - if (existingJobState == JobState.UNKNOWN) { - LOGGER.log(FINE, () -> "Creating job: " + cloudName + "/" + podName); - try { - // FIXME possible clash when pipeline starts before 00:00 and ends after 00:00 - String newArmadaJobSetId = cloud.getDisplayName() - + new SimpleDateFormat("-ddMMyyyy").format(new Date()); - cloud.setArmadaJobSetId(newArmadaJobSetId); - - String completeArmadaJobSetId = - cloud.getArmadaJobSetPrefix() + newArmadaJobSetId; - - ArmadaMapper armadaMapper = new ArmadaMapper(cloud.getArmadaQueue(), - cloud.getArmadaNamespace(), completeArmadaJobSetId, pod); - - JobSubmitResponse jobSubmitResponse = armadaClient.submitJob( - armadaMapper.createJobSubmitRequest()); - String jobId = jobSubmitResponse.getJobResponseItems(0).getJobId(); - kubernetesComputer.setArmadaJobId(jobId); - ((KubernetesSlave) computer.getNode()).setArmadaJobId(jobId); - } catch (KubernetesClientException e) { - Metrics.metricRegistry() - .counter(MetricNames.CREATION_FAILED) - .inc(); - int httpCode = e.getCode(); - if (400 <= httpCode && httpCode < 500) { // 4xx - if (httpCode == 403 && e.getMessage().contains("is forbidden: exceeded quota")) { - node.getRunListener() - .getLogger() - .printf( - "WARNING: Unable to create pod: %s %s/%s because kubernetes resource quota exceeded. %n%s%nRetrying...%n%n", - cloudName, - namespace, - pod.getMetadata().getName(), - e.getMessage()); - } else if (httpCode == 409 - && e.getMessage().contains("Operation cannot be fulfilled on resourcequotas")) { - // See: https://github.com/kubernetes/kubernetes/issues/67761 ; A retry usually works. - node.getRunListener() - .getLogger() - .printf( - "WARNING: Unable to create pod: %s %s/%s because kubernetes resource quota update conflict. %n%s%nRetrying...%n%n", - cloudName, - namespace, - pod.getMetadata().getName(), - e.getMessage()); - } else { - node.getRunListener() - .getLogger() - .printf( - "ERROR: Unable to create pod %s %s/%s.%n%s%n", - cloudName, - namespace, - pod.getMetadata().getName(), - e.getMessage()); - PodUtils.cancelQueueItemFor(pod, e.getMessage()); - } - } else if (500 <= httpCode && httpCode < 600) { // 5xx - LOGGER.log(FINE, "Kubernetes returned HTTP code {0} {1}. Retrying...", new Object[] { - e.getCode(), e.getStatus() - }); - } else { - LOGGER.log(WARNING, "Kubernetes returned unhandled HTTP code {0} {1}", new Object[] { - e.getCode(), e.getStatus() - }); - } - throw e; - } - String armadaLookoutJobUrl = cloud.getArmadaLookoutUrl() + ":" - + cloud.getArmadaLookoutPort() + "/?sb=" + kubernetesComputer.getArmadaJobId(); - LOGGER.log(INFO, () -> "Submitted job: " + armadaLookoutJobUrl); - listener.getLogger().printf("Submitted job: %s %n", armadaLookoutJobUrl); - Metrics.metricRegistry().counter(MetricNames.PODS_CREATED).inc(); - - node.getRunListener().getLogger().printf("Submitted job: %s %n", - armadaLookoutJobUrl); - } else { - LOGGER.log(INFO, () -> "Job already exists: " + - kubernetesComputer.getArmadaJobId()); - listener.getLogger().printf("Job already exists: %s %n", - kubernetesComputer.getArmadaJobId()); - } - kubernetesComputer.setLaunching(true); - - Awaitility.await().atMost(template.getSlaveConnectTimeout(), - TimeUnit.SECONDS).until(() -> { - JobStatusResponse jobStatus = armadaClient.getJobStatus( - JobStatusRequest.newBuilder() - .addJobIds(kubernetesComputer.getArmadaJobId()) - .build()); - return jobStatus.getJobStatesMap().get(kubernetesComputer.getArmadaJobId()) - == JobState.RUNNING; - }); - LOGGER.log(INFO, () -> "Job is running: " + kubernetesComputer.getArmadaJobId()); - - // We need the pod to be running and connected before returning - // otherwise this method keeps being called multiple times - // so wait for agent to be online - int waitForSlaveToConnect = template.getSlaveConnectTimeout(); - int waitedForSlave; - - SlaveComputer slaveComputer = null; - String status = null; - List containerStatuses = null; - long lastReportTimestamp = System.currentTimeMillis(); - for (waitedForSlave = 0; waitedForSlave < waitForSlaveToConnect; waitedForSlave++) { - slaveComputer = node.getComputer(); - if (slaveComputer == null) { - Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); - throw new IllegalStateException("Node was deleted, computer is null"); - } - if (slaveComputer.isOnline()) { - break; - } - - // Check that the job hasn't failed already - JobState jobState = armadaClient.getJobStatus( - JobStatusRequest.newBuilder() - .addJobIds(kubernetesComputer.getArmadaJobId()) - .build()).getJobStatesMap().get(kubernetesComputer.getArmadaJobId()); - if (jobState == JobState.FAILED || jobState == JobState.REJECTED) { - Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); - throw new IllegalStateException("Job failed: " - + kubernetesComputer.getArmadaJobId()); - } - - if (JOB_TERMINATED_STATES.contains(jobState)) { - Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); - Metrics.metricRegistry() - .counter(MetricNames.metricNameForPodStatus(status)) - .inc(); - throw new IllegalStateException("Job '" + kubernetesComputer.getArmadaJobId() - + "' is in terminated state. State: " + jobState); - } - - if (lastReportTimestamp + REPORT_INTERVAL < System.currentTimeMillis()) { - LOGGER.log(INFO, "Waiting for agent to connect ({1}/{2}): {0}", new Object[] { - podName, waitedForSlave, waitForSlaveToConnect - }); - listener.getLogger() - .printf( - "Waiting for agent to connect (%2$s/%3$s): %1$s%n", - podName, waitedForSlave, waitForSlaveToConnect); - lastReportTimestamp = System.currentTimeMillis(); - } - Thread.sleep(1000); - } - if (slaveComputer == null || slaveComputer.isOffline()) { - Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); - Metrics.metricRegistry().counter(MetricNames.FAILED_TIMEOUT).inc(); - - throw new IllegalStateException( - "Agent is not connected after " + waitedForSlave + " seconds, status: " + status); - } - - computer.setAcceptingTasks(true); - launched = true; - try { - // We need to persist the "launched" setting... - node.save(); - } catch (IOException e) { - LOGGER.log(Level.WARNING, "Could not save() agent: " + e.getMessage(), e); - } - Metrics.metricRegistry().counter(MetricNames.PODS_LAUNCHED).inc(); - } catch (Throwable ex) { - setProblem(ex); - Functions.printStackTrace(ex, node.getRunListener().error("Failed to launch " + node.getPodName())); - LOGGER.log( - Level.WARNING, - String.format("Error in provisioning; agent=%s, template=%s", node, node.getTemplateId()), - ex); - LOGGER.log(Level.FINER, "Removing Jenkins node: {0}", node.getNodeName()); - terminateOrLog(node); - throw new RuntimeException(ex); - } - } - - private static void terminateOrLog(KubernetesSlave node) { - try { - node.terminate(); - } catch (IOException | InterruptedException e) { - LOGGER.log(Level.WARNING, "Unable to remove Jenkins node", e); - } - } - - /** - * The last problem that occurred, if any. - * @return - */ - @CheckForNull - public Throwable getProblem() { - return problem; - } - - public void setProblem(@CheckForNull Throwable problem) { - this.problem = problem; - } - - @Override - public Descriptor getDescriptor() { - return new DescriptorImpl(); - } - - // Only there to avoid throwing unnecessary exceptions. KubernetesLauncher is never instantiated via UI. - private static class DescriptorImpl extends Descriptor {} -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesProvisioningLimits.java b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesProvisioningLimits.java index aa461279d..37ccea6c0 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesProvisioningLimits.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesProvisioningLimits.java @@ -45,8 +45,8 @@ private synchronized boolean initInstance() { if (!init) { Queue.withLock(() -> { Jenkins.get().getNodes().stream() - .filter(KubernetesSlave.class::isInstance) - .map(KubernetesSlave.class::cast) + .filter(ArmadaSlave.class::isInstance) + .map(ArmadaSlave.class::cast) .forEach(node -> { cloudCounts.put( node.getCloudName(), getGlobalCount(node.getCloudName()) + node.getNumExecutors()); @@ -161,12 +161,12 @@ int getPodTemplateCount(String podTemplate) { public static class NodeListenerImpl extends NodeListener { @Override protected void onDeleted(@NonNull Node node) { - if (node instanceof KubernetesSlave) { + if (node instanceof ArmadaSlave) { KubernetesProvisioningLimits instance = KubernetesProvisioningLimits.get(); - KubernetesSlave kubernetesNode = (KubernetesSlave) node; + ArmadaSlave kubernetesNode = (ArmadaSlave) node; PodTemplate template = kubernetesNode.getTemplateOrNull(); if (template != null) { - instance.unregister(kubernetesNode.getKubernetesCloud(), template, node.getNumExecutors()); + instance.unregister(kubernetesNode.getArmadaCloud(), template, node.getNumExecutors()); } } } diff --git a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesQueueTaskDispatcher.java b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesQueueTaskDispatcher.java index 824f44a5a..1187fb0e2 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesQueueTaskDispatcher.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesQueueTaskDispatcher.java @@ -10,15 +10,16 @@ @Extension @SuppressWarnings({"rawtypes"}) -public class KubernetesQueueTaskDispatcher extends QueueTaskDispatcher { +public class ArmadaQueueTaskDispatcher extends QueueTaskDispatcher { @Override public CauseOfBlockage canTake(Node node, Queue.BuildableItem item) { - if (node instanceof KubernetesSlave) { - KubernetesSlave slave = (KubernetesSlave) node; + if (node instanceof ArmadaSlave) { + ArmadaSlave slave = (ArmadaSlave) node; Task ownerTask = item.task.getOwnerTask(); - if (!KubernetesFolderProperty.isAllowed(slave, (Job) ownerTask)) { - return new KubernetesCloudNotAllowed(slave.getKubernetesCloud(), (Job) ownerTask); + slave.assignTask(item); + if (!ArmadaFolderProperty.isAllowed(slave, (Job) ownerTask)) { + return new KubernetesCloudNotAllowed(slave.getArmadaCloud(), (Job) ownerTask); } } return null; diff --git a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesSlave.java b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesSlave.java index 37b646fe3..69df822d0 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/KubernetesSlave.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/KubernetesSlave.java @@ -1,9 +1,5 @@ package io.armadaproject.jenkins.plugin; -import api.Job.JobStatusRequest; -import api.Job.JobStatusResponse; -import api.SubmitOuterClass.JobCancelRequest; -import api.SubmitOuterClass.JobState; import edu.umd.cs.findbugs.annotations.CheckForNull; import edu.umd.cs.findbugs.annotations.NonNull; import edu.umd.cs.findbugs.annotations.Nullable; @@ -16,7 +12,6 @@ import hudson.model.Computer; import hudson.model.Descriptor; import hudson.model.Executor; -import hudson.model.Label; import hudson.model.Node; import hudson.model.Queue; import hudson.model.TaskListener; @@ -28,24 +23,22 @@ import hudson.slaves.ComputerLauncher; import hudson.slaves.RetentionStrategy; import hudson.slaves.SlaveComputer; -import io.armadaproject.ArmadaClient; -import io.armadaproject.jenkins.plugin.pod.retention.PodRetention; +import io.armadaproject.jenkins.plugin.job.ArmadaState; import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.api.model.PodSpec; +import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.utils.Serialization; import java.io.IOException; -import java.time.Instant; -import java.util.HashSet; -import java.util.Locale; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; + import jenkins.metrics.api.Metrics; import jenkins.model.Jenkins; import jenkins.security.MasterToSlaveCallable; @@ -61,12 +54,12 @@ /** * @author Carlos Sanchez carlos@apache.org */ -public class KubernetesSlave extends AbstractCloudSlave { +public class ArmadaSlave extends AbstractCloudSlave { - private static final Logger LOGGER = Logger.getLogger(KubernetesSlave.class.getName()); + private static final Logger LOGGER = Logger.getLogger(ArmadaSlave.class.getName()); private static final Integer DISCONNECTION_TIMEOUT = - Integer.getInteger(KubernetesSlave.class.getName() + ".disconnectionTimeout", 5); + Integer.getInteger(ArmadaSlave.class.getName() + ".disconnectionTimeout", 5); private static final long serialVersionUID = -8642936855413034232L; private static final String DEFAULT_AGENT_PREFIX = "jenkins-agent"; @@ -89,6 +82,14 @@ public class KubernetesSlave extends AbstractCloudSlave { private transient Pod pod; private String armadaJobId = ""; + private String armadaJobSetId = ""; + private transient String serverUrl; + private transient String serverCertificate; + private String clusterId; + private String podName; + + private transient Queue.Item item; + private transient PodSpec podSpec; @NonNull public PodTemplate getTemplate() throws IllegalStateException { @@ -108,7 +109,7 @@ public String getTemplateId() { @CheckForNull public PodTemplate getTemplateOrNull() { if (template == null) { - template = getKubernetesCloud().getTemplateById(podTemplateId); + template = getArmadaCloud().getTemplateById(podTemplateId); } return template; } @@ -151,59 +152,35 @@ public TaskListener getRunListener() { return TaskListener.NULL; } - /** - * @deprecated Use {@link Builder} instead. - */ - @Deprecated - public KubernetesSlave(PodTemplate template, String nodeDescription, ArmadaCloud cloud, String labelStr) - throws Descriptor.FormException, IOException { - - this(template, nodeDescription, cloud.name, labelStr, new OnceRetentionStrategy(cloud.getRetentionTimeout())); - } - - /** - * @deprecated Use {@link Builder} instead. - */ - @Deprecated - public KubernetesSlave(PodTemplate template, String nodeDescription, ArmadaCloud cloud, Label label) - throws Descriptor.FormException, IOException { - this( - template, - nodeDescription, - cloud.name, - label.toString(), - new OnceRetentionStrategy(cloud.getRetentionTimeout())); - } - - /** - * @deprecated Use {@link Builder} instead. - */ - @Deprecated - public KubernetesSlave( - PodTemplate template, String nodeDescription, ArmadaCloud cloud, String labelStr, RetentionStrategy rs) - throws Descriptor.FormException, IOException { - this(template, nodeDescription, cloud.name, labelStr, rs); - } - /** * @deprecated Use {@link Builder} instead. */ @Deprecated @DataBoundConstructor // make stapler happy. Not actually used. - public KubernetesSlave( - PodTemplate template, String nodeDescription, String cloudName, String labelStr, RetentionStrategy rs) + public ArmadaSlave( + PodTemplate template, String nodeDescription, String cloudName, String labelStr, RetentionStrategy rs, + String armadaJobSetId, + String armadaJobId, + String serverUrl, + String clusterId, + String podName) throws Descriptor.FormException, IOException { - this(getSlaveName(template), template, nodeDescription, cloudName, labelStr, new KubernetesLauncher(), rs); + this(getSlaveName(template), template, nodeDescription, cloudName, labelStr, new ArmadaLauncher(), rs, armadaJobSetId, armadaJobId, serverUrl, clusterId, podName); } - protected KubernetesSlave( + protected ArmadaSlave( String name, @NonNull PodTemplate template, String nodeDescription, String cloudName, String labelStr, ComputerLauncher computerLauncher, - RetentionStrategy rs) + RetentionStrategy rs, + String armadaJobSetId, + String armadaJobId, + String serverUrl, + String clusterId, + String podName) throws Descriptor.FormException, IOException { super(name, null, computerLauncher); setNodeDescription(nodeDescription); @@ -215,6 +192,11 @@ protected KubernetesSlave( this.cloudName = cloudName; this.template = template; this.podTemplateId = template.getId(); + this.armadaJobSetId = armadaJobSetId; + this.armadaJobId = armadaJobId; + this.serverUrl = serverUrl; + this.clusterId = clusterId; + this.podName = podName; } public String getCloudName() { @@ -231,28 +213,36 @@ public String getNamespace() { } public String getPodName() { + return podName; + } + + public String getAgentName() { return PodTemplateUtils.substituteEnv(getNodeName()); } private String remoteFS; + public void setPodSpec(PodSpec podSpec) { + Optional optionalJnlp = podSpec.getContainers().stream() + .filter(c -> ArmadaCloud.JNLP_NAME.equals(c.getName())) + .findFirst(); + if (optionalJnlp.isPresent()) { + remoteFS = StringUtils.defaultIfBlank( + optionalJnlp.get().getWorkingDir(), ContainerTemplate.DEFAULT_WORKING_DIR); + } + + this.podSpec = podSpec; + } + + public PodSpec getPodSpec() { + return this.podSpec; + } + @SuppressFBWarnings( value = "NM_CONFUSING", justification = "Naming confusion with a getRemoteFs method, but the latter is deprecated.") @Override public String getRemoteFS() { - if (remoteFS == null) { - Optional optionalPod = getPod(); - if (optionalPod.isPresent()) { - Optional optionalJnlp = optionalPod.get().getSpec().getContainers().stream() - .filter(c -> ArmadaCloud.JNLP_NAME.equals(c.getName())) - .findFirst(); - if (optionalJnlp.isPresent()) { - remoteFS = StringUtils.defaultIfBlank( - optionalJnlp.get().getWorkingDir(), ContainerTemplate.DEFAULT_WORKING_DIR); - } - } - } return Util.fixNull(remoteFS); } @@ -269,15 +259,15 @@ public FilePath getRootPath() { } } - /** - * @deprecated Please use the strongly typed getKubernetesCloud() instead. - */ - @Deprecated - public Cloud getCloud() { - return Jenkins.getInstance().getCloud(getCloudName()); - } - public Optional getPod() { + if(pod == null) { + if (podName == null) { + return Optional.empty(); + } + + tryLoadPod(); + } + return Optional.ofNullable(pod); } @@ -289,24 +279,46 @@ public void setArmadaJobId(String armadaJobId) { this.armadaJobId = armadaJobId; } + public String getArmadaJobSetId() { + return armadaJobSetId; + } + + public void setArmadaJobSetId(String armadaJobSetId) { + this.armadaJobSetId = armadaJobSetId; + } + /** * Returns the cloud instance which created this agent. * @return the cloud instance which created this agent. * @throws IllegalStateException if the cloud doesn't exist anymore, or is not a {@link ArmadaCloud}. */ @NonNull - public ArmadaCloud getKubernetesCloud() { - return getKubernetesCloud(getCloudName()); + public ArmadaCloud getArmadaCloud() { + return getArmadaCloud(getCloudName()); } - private static ArmadaCloud getKubernetesCloud(String cloudName) { + public KubernetesClient connect() throws KubernetesAuthException, IOException { + var serverUrl = getServerUrl(); + var serverCertificate = getCaCertData(); + LOGGER.log(Level.FINEST, "Building connection to Kubernetes {0} URL {1}", new String[] { + getDisplayName(), serverUrl + }); + KubernetesClient client = KubernetesClientProvider.createClient(getArmadaCloud(), serverUrl, serverCertificate); + + LOGGER.log(Level.FINE, "Connected to Kubernetes {0} URL {1} namespace {2}", new String[] { + getDisplayName(), client.getMasterUrl().toString(), client.getNamespace() + }); + return client; + } + + private static ArmadaCloud getArmadaCloud(String cloudName) { Cloud cloud = Jenkins.get().getCloud(cloudName); if (cloud instanceof ArmadaCloud) { return (ArmadaCloud) cloud; } else if (cloud == null) { throw new IllegalStateException("No such cloud " + cloudName); } else { - throw new IllegalStateException(KubernetesSlave.class.getName() + " can be launched only by instances of " + throw new IllegalStateException(ArmadaSlave.class.getName() + " can be launched only by instances of " + ArmadaCloud.class.getName() + ". Cloud is " + cloud.getClass().getName()); } @@ -330,28 +342,8 @@ static String getSlaveName(PodTemplate template) { } @Override - public KubernetesComputer createComputer() { - return KubernetesComputerFactory.createInstance(this); - } - - public PodRetention getPodRetention(ArmadaCloud cloud) { - PodRetention retentionPolicy = cloud.getPodRetention(); - PodTemplate template = getTemplateOrNull(); - if (template != null) { - PodRetention pr = template.getPodRetention(); - // https://issues.jenkins-ci.org/browse/JENKINS-53260 - // even though we default the pod template's retention - // strategy, there are various legacy paths for injecting - // pod templates where the - // value can still be null, so check for it here so - // as to not blow up termination path - // if (pr != null) { - retentionPolicy = pr; - // } else { - // LOGGER.fine("Template pod retention policy was null"); - // } - } - return retentionPolicy; + public ArmadaComputer createComputer() { + return ArmadaComputerFactory.createInstance(this); } @Override @@ -360,7 +352,7 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted ArmadaCloud cloud; try { - cloud = getKubernetesCloud(); + cloud = getArmadaCloud(); } catch (IllegalStateException e) { e.printStackTrace( listener.fatalError( @@ -387,6 +379,9 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted Future disconnectorFuture = ch.callAsync(new SlaveDisconnector()); try { disconnectorFuture.get(DISCONNECTION_TIMEOUT, TimeUnit.SECONDS); + + //this will shut down the jnlp container + ch.close(); } catch (InterruptedException | ExecutionException | TimeoutException e) { String msg = String.format( "Ignoring error sending order to not reconnect agent %s: %s", name, e.getMessage()); @@ -401,40 +396,21 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted return; } - try { - ArmadaClient armadaClient = cloud.connectToArmada(); - deleteSlavePod(listener, armadaClient); - Metrics.metricRegistry().counter(MetricNames.PODS_TERMINATED).inc(); - - String msg = String.format("Disconnected computer %s", name); - LOGGER.log(Level.INFO, msg); - listener.getLogger().println(msg); - } catch (KubernetesAuthException e) { - LOGGER.warning("Failed to connect to Armada. There might be leftover jobs running."); - } - } - - private void deleteSlavePod(TaskListener listener, ArmadaClient armadaClient) { - JobStatusResponse jobStatusResponse = armadaClient.getJobStatus(JobStatusRequest.newBuilder() - .addJobIds(armadaJobId) - .build()); - - if (jobStatusResponse.getJobStatesMap().get(armadaJobId) == JobState.RUNNING) { - ArmadaCloud armadaCloud = getKubernetesCloud(); - armadaClient.cancelJob(JobCancelRequest.newBuilder() - .setQueue(armadaCloud.getArmadaQueue()) - .setJobSetId(armadaCloud.getArmadaJobSetPrefix() - + armadaCloud.getArmadaJobSetId()) - .setJobId(armadaJobId) - .build()); - - String msg = ("Cancelled job: " + armadaJobId); - LOGGER.info(msg); - listener.getLogger().println(msg); - } else { - String msg = ("No jobs in running state for id: " + armadaJobId); - LOGGER.log(Level.WARNING, msg); - listener.error(msg); + var jobManager = ArmadaState.getJobManager(cloud); + try { + + if(armadaJobSetId != null && armadaJobId != null) { + // give the job a chance to exit cleanly + try { + jobManager.waitUntilTerminated(armadaJobSetId, armadaJobId, DISCONNECTION_TIMEOUT * 2L, TimeUnit.SECONDS); + } catch(Throwable e) { + LOGGER.log(Level.INFO, "Error while waiting for job " + armadaJobId + " to terminate cleanly.", e); + } + jobManager.cancelJob(armadaJobSetId, armadaJobId); + Metrics.metricRegistry().counter(MetricNames.JOBS_CANCELLED).inc(); + } + } catch (Throwable e) { + LOGGER.warning("Failed to connect to Armada. There might be leftover jobs running."); } } @@ -448,7 +424,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; if (!super.equals(o)) return false; - KubernetesSlave that = (KubernetesSlave) o; + ArmadaSlave that = (ArmadaSlave) o; return cloudName.equals(that.cloudName); } @@ -477,8 +453,17 @@ public Launcher createLauncher(TaskListener listener) { return launcher; } - void assignPod(@CheckForNull Pod pod) { - this.pod = pod; + public void assignPod(String podName) { + this.podName = podName; + tryLoadPod(); + } + + private void tryLoadPod() { + try { + this.pod = connect().pods().inNamespace(getNamespace()).withName(podName).item(); + } catch (Throwable e) { + throw new RuntimeException(e); + } } private void printAgentDescription(TaskListener listener) { @@ -513,7 +498,7 @@ private void checkHomeAndWarnIfNeeded(TaskListener listener) { @Override protected Object readResolve() { - KubernetesSlave ks = (KubernetesSlave) super.readResolve(); + ArmadaSlave ks = (ArmadaSlave) super.readResolve(); ks.executables = new HashSet<>(); return ks; } @@ -526,41 +511,42 @@ public static Builder builder() { return new Builder(); } - public void annotateTtl(TaskListener listener) { - try { - var kubernetesCloud = getKubernetesCloud(); - Optional.ofNullable(kubernetesCloud.getGarbageCollection()).ifPresent(gc -> { - var ns = getNamespace(); - var name = getPodName(); - var l = Instant.now(); - try { - kubernetesCloud - .connect() - .pods() - .inNamespace(ns) - .withName(name) - .patch("{\"metadata\":{\"annotations\":{\"" + GarbageCollection.ANNOTATION_LAST_REFRESH - + "\":\"" + l.toEpochMilli() + "\"}}}"); - } catch (KubernetesAuthException e) { - e.printStackTrace(listener.error("Failed to authenticate to Kubernetes cluster")); - } catch (IOException e) { - e.printStackTrace(listener.error("Failed to connect to Kubernetes cluster")); - } - listener.getLogger().println("Annotated agent pod " + ns + "/" + name + " with TTL"); - LOGGER.log(Level.FINE, () -> "Annotated agent pod " + ns + "/" + name + " with TTL"); - try { - save(); - } catch (IOException e) { - LOGGER.log(Level.WARNING, e, () -> "Failed to save"); - } - }); - } catch (RuntimeException e) { - e.printStackTrace(listener.error("Failed to annotate agent pod with TTL")); + public void setClusterId(String clusterId) { + this.clusterId = clusterId; + } + + public String getCaCertData() { + ensureServerInfo(); + return serverCertificate; + } + + public String getServerUrl() { + ensureServerInfo(); + return this.serverUrl; + } + + private void ensureServerInfo() { + if(this.serverUrl == null) { + if (clusterId == null) { + throw new IllegalStateException("ClusterId is not set"); + } + + var clusterData = ArmadaClusterInfoProvider.resolveClusterInfo(getArmadaCloud(), clusterId); + this.serverUrl = clusterData.getApiUrl(); + this.serverCertificate = clusterData.getServerCertificate(); } } + public Queue.Item getItem() { + return item; + } + + public void assignTask(Queue.Item task) { + this.item = task; + } + /** - * Builds a {@link KubernetesSlave} instance. + * Builds a {@link ArmadaSlave} instance. */ public static class Builder { private String name; @@ -572,7 +558,7 @@ public static class Builder { private RetentionStrategy retentionStrategy; /** - * @param name The name of the future {@link KubernetesSlave} + * @param name The name of the future {@link ArmadaSlave} * @return the current instance for method chaining */ public Builder name(String name) { @@ -581,7 +567,7 @@ public Builder name(String name) { } /** - * @param nodeDescription The node description of the future {@link KubernetesSlave} + * @param nodeDescription The node description of the future {@link ArmadaSlave} * @return the current instance for method chaining */ public Builder nodeDescription(String nodeDescription) { @@ -590,7 +576,7 @@ public Builder nodeDescription(String nodeDescription) { } /** - * @param podTemplate The pod template the future {@link KubernetesSlave} has been created from + * @param podTemplate The pod template the future {@link ArmadaSlave} has been created from * @return the current instance for method chaining */ public Builder podTemplate(PodTemplate podTemplate) { @@ -599,7 +585,7 @@ public Builder podTemplate(PodTemplate podTemplate) { } /** - * @param cloud The cloud that is provisioning the {@link KubernetesSlave} instance. + * @param cloud The cloud that is provisioning the {@link ArmadaSlave} instance. * @return the current instance for method chaining */ public Builder cloud(ArmadaCloud cloud) { @@ -608,7 +594,7 @@ public Builder cloud(ArmadaCloud cloud) { } /** - * @param label The label the {@link KubernetesSlave} has. + * @param label The label the {@link ArmadaSlave} has. * @return the current instance for method chaining */ public Builder label(String label) { @@ -617,7 +603,7 @@ public Builder label(String label) { } /** - * @param computerLauncher The computer launcher to use to launch the {@link KubernetesSlave} instance. + * @param computerLauncher The computer launcher to use to launch the {@link ArmadaSlave} instance. * @return the current instance for method chaining */ public Builder computerLauncher(ComputerLauncher computerLauncher) { @@ -626,7 +612,7 @@ public Builder computerLauncher(ComputerLauncher computerLauncher) { } /** - * @param retentionStrategy The retention strategy to use for the {@link KubernetesSlave} instance. + * @param retentionStrategy The retention strategy to use for the {@link ArmadaSlave} instance. * @return the current instance for method chaining */ public Builder retentionStrategy(RetentionStrategy retentionStrategy) { @@ -644,18 +630,18 @@ private static RetentionStrategy determineRetentionStrategy( } /** - * Builds the resulting {@link KubernetesSlave} instance. - * @return an initialized {@link KubernetesSlave} instance. + * Builds the resulting {@link ArmadaSlave} instance. + * @return an initialized {@link ArmadaSlave} instance. * @throws IOException * @throws Descriptor.FormException */ @SuppressFBWarnings( value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR", justification = "False positive. https://github.com/spotbugs/spotbugs/issues/567") - public KubernetesSlave build() throws IOException, Descriptor.FormException { + public ArmadaSlave build() throws IOException, Descriptor.FormException { Validate.notNull(podTemplate); Validate.notNull(cloud); - return new KubernetesSlave( + return new ArmadaSlave( name == null ? getSlaveName(podTemplate) : name, podTemplate, nodeDescription == null ? podTemplate.getName() : nodeDescription, @@ -664,14 +650,19 @@ public KubernetesSlave build() throws IOException, Descriptor.FormException { decorateLauncher( cloud, computerLauncher == null - ? new KubernetesLauncher(cloud.getJenkinsTunnel(), null) + ? new ArmadaLauncher(cloud.getJenkinsTunnel(), null) : computerLauncher), - retentionStrategy == null ? determineRetentionStrategy(cloud, podTemplate) : retentionStrategy); + retentionStrategy == null ? determineRetentionStrategy(cloud, podTemplate) : retentionStrategy, + null, + null, + null, + null, + null); } private ComputerLauncher decorateLauncher(@NonNull ArmadaCloud cloud, @NonNull ComputerLauncher launcher) { - if (launcher instanceof KubernetesLauncher) { - ((KubernetesLauncher) launcher).setWebSocket(cloud.isWebSocket()); + if (launcher instanceof ArmadaLauncher) { + ((ArmadaLauncher) launcher).setWebSocket(cloud.isWebSocket()); } return launcher; } diff --git a/src/main/java/io/armadaproject/jenkins/plugin/MetricNames.java b/src/main/java/io/armadaproject/jenkins/plugin/MetricNames.java index 72caa8c65..ad312c661 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/MetricNames.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/MetricNames.java @@ -3,21 +3,21 @@ import java.util.Locale; public class MetricNames { - private static final String PREFIX = "kubernetes.cloud"; + private static final String PREFIX = "armada.cloud"; - public static final String CREATION_FAILED = PREFIX + ".pods.creation.failed"; - public static final String PODS_CREATED = PREFIX + ".pods.created"; - public static final String LAUNCH_FAILED = PREFIX + ".pods.launch.failed"; - public static final String PODS_TERMINATED = PREFIX + ".pods.terminated"; + public static final String CREATION_FAILED = PREFIX + ".jobs.creation.failed"; + public static final String JOBS_SUBMITTED = PREFIX + ".jobs.submitted"; + public static final String LAUNCH_FAILED = PREFIX + ".jobs.launch.failed"; + public static final String JOBS_CANCELLED = PREFIX + ".jobs.cancelled"; public static final String REACHED_POD_CAP = PREFIX + ".provision.reached.pod.cap"; public static final String REACHED_GLOBAL_CAP = PREFIX + ".provision.reached.global.cap"; - public static final String FAILED_TIMEOUT = PREFIX + ".pods.launch.failed.timeout"; + public static final String FAILED_TIMEOUT = PREFIX + ".jobs.launch.failed.timeout"; public static final String PROVISION_NODES = PREFIX + ".provision.nodes"; public static final String PROVISION_FAILED = PREFIX + ".provision.failed"; - public static final String PODS_LAUNCHED = PREFIX + ".pods.launched"; + public static final String JOBS_LAUNCHED = PREFIX + ".jobs.launched"; public static String metricNameForPodStatus(String status) { String formattedStatus = status == null ? "null" : status.toLowerCase(Locale.getDefault()); - return PREFIX + ".pods.launch.status." + formattedStatus; + return PREFIX + ".jobs.launch.status." + formattedStatus; } } diff --git a/src/main/java/io/armadaproject/jenkins/plugin/OpenShiftBearerTokenCredentialImpl.java b/src/main/java/io/armadaproject/jenkins/plugin/OpenShiftBearerTokenCredentialImpl.java index cf6e43f8f..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/OpenShiftBearerTokenCredentialImpl.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/OpenShiftBearerTokenCredentialImpl.java @@ -1,22 +0,0 @@ -package io.armadaproject.jenkins.plugin; - -import com.cloudbees.plugins.credentials.CredentialsScope; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import org.kohsuke.stapler.DataBoundConstructor; - -/** - * @author Nicolas De Loof - */ -@Deprecated -@SuppressFBWarnings("NM_SAME_SIMPLE_NAME_AS_SUPERCLASS") -public class OpenShiftBearerTokenCredentialImpl - extends org.jenkinsci.plugins.kubernetes.credentials.OpenShiftBearerTokenCredentialImpl { - - private static final long serialVersionUID = -3725963485838773012L; - - @DataBoundConstructor - public OpenShiftBearerTokenCredentialImpl( - CredentialsScope scope, String id, String description, String username, String password) { - super(scope, id, description, username, password); - } -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/OpenShiftTokenCredentialImpl.java b/src/main/java/io/armadaproject/jenkins/plugin/OpenShiftTokenCredentialImpl.java index b0a27a31f..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/OpenShiftTokenCredentialImpl.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/OpenShiftTokenCredentialImpl.java @@ -1,52 +0,0 @@ -package io.armadaproject.jenkins.plugin; - -import com.cloudbees.plugins.credentials.CredentialsProvider; -import com.cloudbees.plugins.credentials.CredentialsScope; -import com.cloudbees.plugins.credentials.impl.BaseStandardCredentials; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import hudson.Extension; -import hudson.util.Secret; -import org.jenkinsci.plugins.plaincredentials.StringCredentials; -import org.kohsuke.stapler.DataBoundConstructor; - -/** - * @deprecated Use {@link StringCredentials} - * @author Andrew Block - */ -@SuppressFBWarnings( - value = "SE_NO_SERIALVERSIONID", - justification = "Serialization happens exclusively through XStream and not Java Serialization.") -@Deprecated -public class OpenShiftTokenCredentialImpl extends BaseStandardCredentials implements TokenProducer { - - private final Secret secret; - - @DataBoundConstructor - public OpenShiftTokenCredentialImpl(CredentialsScope scope, String id, String description, Secret secret) { - super(scope, id, description); - this.secret = secret; - } - - @Override - public String getToken(String serviceAddress, String caCertData, boolean skipTlsVerify) { - return secret.getPlainText(); - } - - public Secret getSecret() { - return secret; - } - - @Extension - public static class DescriptorImpl extends BaseStandardCredentialsDescriptor { - - @Override - public String getDisplayName() { - return "OpenShift OAuth token (Deprecated)"; - } - - @Override - public boolean isApplicable(CredentialsProvider provider) { - return false; - } - } -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/PodTemplate.java b/src/main/java/io/armadaproject/jenkins/plugin/PodTemplate.java index 77f68c491..deba4aac8 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/PodTemplate.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/PodTemplate.java @@ -42,7 +42,6 @@ import net.sf.json.JSONObject; import org.apache.commons.lang.StringUtils; import io.armadaproject.jenkins.plugin.model.TemplateEnvVar; -import io.armadaproject.jenkins.plugin.pod.retention.PodRetention; import io.armadaproject.jenkins.plugin.pod.yaml.YamlMergeStrategy; import io.armadaproject.jenkins.plugin.volumes.PodVolume; import io.armadaproject.jenkins.plugin.volumes.workspace.WorkspaceVolume; @@ -111,8 +110,6 @@ protected static MessageDigest getLabelDigestFunction() { private String name; - private String namespace; - private String image; private boolean privileged; @@ -241,9 +238,6 @@ public Pod getYamlsPod() { @CheckForNull private transient TaskListener listener; - @CheckForNull - private PodRetention podRetention; - public PodTemplate() { this((String) null); } @@ -319,15 +313,6 @@ public String getName() { return name; } - public String getNamespace() { - return namespace; - } - - @DataBoundSetter - public void setNamespace(String namespace) { - this.namespace = Util.fixEmptyAndTrim(namespace); - } - @Deprecated public String getImage() { return getFirstContainer().map(ContainerTemplate::getImage).orElse(null); @@ -902,22 +887,13 @@ public void setYamls(List yamls) { } } - public PodRetention getPodRetention() { - return podRetention == null ? PodRetention.getPodTemplateDefault() : podRetention; - } - - @DataBoundSetter - public void setPodRetention(PodRetention podRetention) { - this.podRetention = PodRetention.getPodTemplateDefault().equals(podRetention) ? null : podRetention; - } - - /** @see KubernetesSlave#getRunListener */ + /** @see ArmadaSlave#getRunListener */ @NonNull public TaskListener getListener() { return listener == null ? TaskListener.NULL : listener; } - /** @see KubernetesSlave#getRunListener */ + /** @see ArmadaSlave#getRunListener */ @CheckForNull public TaskListener getListenerOrNull() { return listener; @@ -990,7 +966,7 @@ protected Object readResolve() { } @Deprecated - public Pod build(KubernetesClient client, KubernetesSlave slave) { + public Pod build(KubernetesClient client, ArmadaSlave slave) { return build(slave); } @@ -999,7 +975,7 @@ public Pod build(KubernetesClient client, KubernetesSlave slave) { * * @param slave */ - public Pod build(KubernetesSlave slave) { + public Pod build(ArmadaSlave slave) { return new PodTemplateBuilder(this, slave).build(); } @@ -1134,13 +1110,6 @@ public WorkspaceVolume getDefaultWorkspaceVolume() { return WorkspaceVolume.getDefault(); } - @SuppressWarnings("unused") // Used by jelly - @Restricted(DoNotUse.class) // Used by jelly - public Descriptor getDefaultPodRetention() { - return Jenkins.get() - .getDescriptor(PodRetention.getPodTemplateDefault().getClass()); - } - @SuppressWarnings("unused") // Used by jelly @Restricted(DoNotUse.class) // Used by jelly public YamlMergeStrategy getDefaultYamlMergeStrategy() { @@ -1153,7 +1122,6 @@ public String toString() { return "PodTemplate{" + (id == null ? "" : "id='" + id + '\'') + (inheritFrom == null ? "" : ", inheritFrom='" + inheritFrom + '\'') + (name == null ? "" : ", name='" + name + '\'') - + (namespace == null ? "" : ", namespace='" + namespace + '\'') + (image == null ? "" : ", image='" + image + '\'') + (!privileged ? "" : ", privileged=" + privileged) + (runAsUser == null ? "" : ", runAsUser=" + runAsUser) @@ -1185,7 +1153,6 @@ public String toString() { ? "" : ", resourceLimitEphemeralStorage='" + resourceLimitEphemeralStorage + '\'') + (workspaceVolume == null ? "" : ", workspaceVolume='" + workspaceVolume + '\'') - + (podRetention == null ? "" : ", podRetention='" + podRetention + '\'') + (volumes == null || volumes.isEmpty() ? "" : ", volumes=" + volumes) + (containers == null || containers.isEmpty() ? "" : ", containers=" + containers) + (envVars == null || envVars.isEmpty() ? "" : ", envVars=" + envVars) diff --git a/src/main/java/io/armadaproject/jenkins/plugin/PodTemplateBuilder.java b/src/main/java/io/armadaproject/jenkins/plugin/PodTemplateBuilder.java index 10e034841..e0eca467a 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/PodTemplateBuilder.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/PodTemplateBuilder.java @@ -101,13 +101,13 @@ public class PodTemplateBuilder { private static final String WORKSPACE_VOLUME_NAME = "workspace-volume"; public static final Pattern FROM_DIRECTIVE = Pattern.compile("^FROM (.*)$"); - public static final String LABEL_KUBERNETES_CONTROLLER = "kubernetes.jenkins.io/controller"; + public static final String LABEL_KUBERNETES_CONTROLLER = "armada.jenkins.io/controller"; static final String NO_RECONNECT_AFTER_TIMEOUT = SystemProperties.getString(PodTemplateBuilder.class.getName() + ".noReconnectAfter", "1d"); private static final String JENKINS_AGENT_FILE_ENVVAR = "JENKINS_AGENT_FILE"; private static final String JENKINS_AGENT_AGENT_JAR = "/jenkins-agent/agent.jar"; private static final String JENKINS_AGENT_LAUNCHER_SCRIPT_LOCATION = "/jenkins-agent/jenkins-agent"; - public static final String ARMADA_LABEL = "kubernetes.jenkins.io/armada"; + public static final String ARMADA_LABEL = "armada.jenkins.io/agentname"; @SuppressFBWarnings(value = "MS_SHOULD_BE_FINAL", justification = "tests") @Restricted(NoExternalUse.class) @@ -150,7 +150,7 @@ public class PodTemplateBuilder { private PodTemplate template; @CheckForNull - private KubernetesSlave agent; + private ArmadaSlave agent; @CheckForNull private ArmadaCloud cloud; @@ -160,20 +160,20 @@ public PodTemplateBuilder(PodTemplate template) { this.template = template; } - public PodTemplateBuilder(PodTemplate template, KubernetesSlave agent) { + public PodTemplateBuilder(PodTemplate template, ArmadaSlave agent) { this.template = template; this.agent = agent; - this.cloud = agent.getKubernetesCloud(); + this.cloud = agent.getArmadaCloud(); } - public PodTemplateBuilder withSlave(@NonNull KubernetesSlave slave) { + public PodTemplateBuilder withSlave(@NonNull ArmadaSlave slave) { this.agent = slave; - this.cloud = slave.getKubernetesCloud(); + this.cloud = slave.getArmadaCloud(); return this; } @Deprecated - public Pod build(KubernetesSlave slave) { + public Pod build(ArmadaSlave slave) { LOGGER.log(Level.WARNING, "This method is deprecated and does nothing"); return this.build(); } @@ -189,7 +189,7 @@ public Pod build() { if (agent == null) { throw new IllegalStateException("No KubernetesSlave is set"); } - String podName = agent.getPodName(); + String podName = agent.getAgentName(); int i = 0; for (final PodVolume volume : template.getVolumes()) { final String volumeName = "volume-" + i; @@ -235,7 +235,7 @@ public Pod build() { Map labels = new HashMap<>(); if (agent != null) { - labels.putAll(agent.getKubernetesCloud().getPodLabelsMap()); + labels.putAll(agent.getArmadaCloud().getPodLabelsMap()); } labels.putAll(template.getLabelsMap()); if (!labels.isEmpty()) { @@ -307,15 +307,6 @@ public Pod build() { // merge with the yaml fragments Pod pod = combine(template.getYamlsPod(), builder.endSpec().build()); - // Apply defaults - if (pod.getMetadata().getNamespace() == null) { - if (template.getNamespace() != null) { - pod.getMetadata().setNamespace(template.getNamespace()); - } else if (cloud != null && cloud.getNamespace() != null) { - pod.getMetadata().setNamespace(cloud.getNamespace()); - } - } - // default agent container String agentContainerName = StringUtils.defaultString(template.getAgentContainer(), JNLP_NAME); Optional agentOpt = pod.getSpec().getContainers().stream() @@ -440,7 +431,7 @@ private String normalizePath(String np) { private Map defaultEnvVars(Collection globalEnvVars) { Map env = new HashMap<>(); if (agent != null) { - ArmadaCloud cloud = agent.getKubernetesCloud(); + ArmadaCloud cloud = agent.getArmadaCloud(); if (cloud.isAddMasterProxyEnvVars()) { // see if the env vars for proxy that the remoting.jar looks for // are set on the controller, and if so, propagate them to the agent @@ -489,7 +480,7 @@ private Map agentEnvVars(String workingDir) { env.put("JENKINS_AGENT_WORKDIR", workingDir); - ArmadaCloud cloud = agent.getKubernetesCloud(); + ArmadaCloud cloud = agent.getArmadaCloud(); if (!StringUtils.isBlank(cloud.getJenkinsTunnel())) { env.put("JENKINS_TUNNEL", cloud.getJenkinsTunnel()); @@ -687,7 +678,6 @@ private Map getAnnotationsMap(List annotations) { builder.put(podAnnotation.getKey(), substituteEnv(podAnnotation.getValue())); } } - builder.put(GarbageCollection.ANNOTATION_LAST_REFRESH, String.valueOf(System.currentTimeMillis())); return Collections.unmodifiableMap(builder); } diff --git a/src/main/java/io/armadaproject/jenkins/plugin/PodTemplateUtils.java b/src/main/java/io/armadaproject/jenkins/plugin/PodTemplateUtils.java index eb5439e3e..77405cbee 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/PodTemplateUtils.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/PodTemplateUtils.java @@ -466,7 +466,6 @@ public static PodTemplate combine(PodTemplate parent, PodTemplate template) { PodTemplate podTemplate = new PodTemplate(template.getId()); var h = new HierarchyResolver<>(parent, template); podTemplate.setName(name); - podTemplate.setNamespace(h.resolve(PodTemplate::getNamespace, PodTemplateUtils::isNullOrEmpty)); podTemplate.setLabel(label); podTemplate.setNodeSelector(h.resolve(PodTemplate::getNodeSelector, PodTemplateUtils::isNullOrEmpty)); podTemplate.setServiceAccount(h.resolve(PodTemplate::getServiceAccount, PodTemplateUtils::isNullOrEmpty)); @@ -494,7 +493,6 @@ public static PodTemplate combine(PodTemplate parent, PodTemplate template) { h.resolve(PodTemplate::getActiveDeadlineSeconds, i -> Objects.equals(i, 0))); podTemplate.setServiceAccount(h.resolve(PodTemplate::getServiceAccount, PodTemplateUtils::isNullOrEmpty)); podTemplate.setSchedulerName(h.resolve(PodTemplate::getSchedulerName, PodTemplateUtils::isNullOrEmpty)); - podTemplate.setPodRetention(template.getPodRetention()); podTemplate.setShowRawYaml(h.resolve(PodTemplate::isShowRawYaml, v -> v)); podTemplate.setRunAsUser(h.resolve(PodTemplate::getRunAsUser, Objects::isNull)); podTemplate.setRunAsGroup(h.resolve(PodTemplate::getRunAsGroup, Objects::isNull)); diff --git a/src/main/java/io/armadaproject/jenkins/plugin/PodUtils.java b/src/main/java/io/armadaproject/jenkins/plugin/PodUtils.java index 0ecb40586..72975197f 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/PodUtils.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/PodUtils.java @@ -20,10 +20,7 @@ import edu.umd.cs.findbugs.annotations.NonNull; import hudson.Util; import hudson.model.Label; -import io.fabric8.kubernetes.api.model.ContainerStatus; -import io.fabric8.kubernetes.api.model.ObjectMeta; -import io.fabric8.kubernetes.api.model.Pod; -import io.fabric8.kubernetes.api.model.PodStatus; +import io.fabric8.kubernetes.api.model.*; import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.KubernetesClientException; import java.util.Arrays; @@ -43,66 +40,13 @@ private PodUtils() {} private static final Logger LOGGER = Logger.getLogger(PodUtils.class.getName()); - public static final Predicate CONTAINER_IS_TERMINATED = - cs -> cs.getState().getTerminated() != null; - public static final Predicate CONTAINER_IS_WAITING = - cs -> cs.getState().getWaiting() != null; - - @NonNull - public static List getTerminatedContainers(Pod pod) { - return getContainers(pod, CONTAINER_IS_TERMINATED); - } - - public static List getWaitingContainers(Pod pod) { - return getContainers(pod, CONTAINER_IS_WAITING); - } - - public static List getContainerStatus(Pod pod) { - PodStatus podStatus = pod.getStatus(); - if (podStatus == null) { - return Collections.emptyList(); - } - return podStatus.getContainerStatuses(); - } - - public static List getContainers(Pod pod, Predicate predicate) { - return getContainerStatus(pod).stream().filter(predicate).collect(Collectors.toList()); - } - - /** - *

Cancel queue items matching the given pod. - *

The queue item has to have a task url matching the pod "runUrl"-annotation - * and the queue item assigned label needs to match the label jenkins/label of the pod. - *

It uses the current thread context to list item queues, - * so make sure to be in the right context before calling this method. - * - * @param pod The pod to cancel items for. - * @param reason The reason the item are being cancelled. - */ - public static void cancelQueueItemFor(Pod pod, String reason) { - var metadata = pod.getMetadata(); - if (metadata == null) { - return; - } - String podName = metadata.getName(); - String podNamespace = metadata.getNamespace(); - String podDisplayName = podNamespace + "/" + podName; - var annotations = metadata.getAnnotations(); - if (annotations == null) { - LOGGER.log(Level.FINE, () -> "Pod " + podDisplayName + " .metadata.annotations is null"); - return; - } - var runUrl = annotations.get(PodTemplateStepExecution.POD_ANNOTATION_RUN_URL); - if (runUrl == null) { - LOGGER.log(Level.FINE, () -> "Pod " + podDisplayName + " .metadata.annotations.runUrl is null"); - return; - } - var labels = metadata.getLabels(); - if (labels == null) { - LOGGER.log(Level.FINE, () -> "Pod " + podDisplayName + " .metadata.labels is null"); - return; + public static void cancelQueueItemFor(ArmadaSlave node, String reason) { + var queueItem = node.getItem(); + if(queueItem != null) { + LOGGER.log(Level.FINE, "Canceling queue item \"" + queueItem.task.getDisplayName() + "\"\n" + (!StringUtils.isBlank(reason) ? "due to " + reason : "")); + var queue = Jenkins.get().getQueue(); + queue.cancel(queueItem); } - cancelQueueItemFor(runUrl, labels.get(PodTemplate.JENKINS_LABEL), reason, podDisplayName); } public static void cancelQueueItemFor( diff --git a/src/main/java/io/armadaproject/jenkins/plugin/StandardPlannedNodeBuilder.java b/src/main/java/io/armadaproject/jenkins/plugin/StandardPlannedNodeBuilder.java index 4590508c3..17d14cc65 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/StandardPlannedNodeBuilder.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/StandardPlannedNodeBuilder.java @@ -17,7 +17,7 @@ public NodeProvisioner.PlannedNode build() { CompletableFuture f; String displayName; try { - KubernetesSlave agent = KubernetesSlave.builder() + ArmadaSlave agent = ArmadaSlave.builder() .podTemplate(t.isUnwrapped() ? t : cloud.getUnwrappedTemplate(t)) .cloud(cloud) .build(); diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientParameters.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientParameters.java new file mode 100644 index 000000000..2493d1368 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientParameters.java @@ -0,0 +1,33 @@ +package io.armadaproject.jenkins.plugin.job; + +import java.util.Objects; + +public class ArmadaClientParameters { + public final String apiUrl; + public final int apiPort; + public final String queue; + public final String namespace; + public final String credentialsId; + public final ArmadaJobSetStrategy jobSetStrategy; + + public ArmadaClientParameters(String apiUrl, int apiPort, String queue, String namespace, String credentialsId, ArmadaJobSetStrategy jobSetStrategy) { + this.apiUrl = apiUrl; + this.apiPort = apiPort; + this.queue = queue; + this.namespace = namespace; + this.credentialsId = credentialsId; + this.jobSetStrategy = jobSetStrategy; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ArmadaClientParameters)) return false; + ArmadaClientParameters that = (ArmadaClientParameters) o; + return apiPort == that.apiPort && Objects.equals(apiUrl, that.apiUrl) && Objects.equals(queue, that.queue) && Objects.equals(namespace, that.namespace) && Objects.equals(credentialsId, that.credentialsId) && Objects.equals(jobSetStrategy, that.jobSetStrategy); + } + + @Override + public int hashCode() { + return Objects.hash(apiUrl, apiPort, queue, namespace, credentialsId, jobSetStrategy); + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientProvider.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientProvider.java new file mode 100644 index 000000000..c006610f2 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientProvider.java @@ -0,0 +1,8 @@ +package io.armadaproject.jenkins.plugin.job; + +import io.armadaproject.ArmadaClient; +import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; + +public interface ArmadaClientProvider { + ArmadaClient get() throws KubernetesAuthException; +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientUtil.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientUtil.java new file mode 100644 index 000000000..7b0a0ad22 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaClientUtil.java @@ -0,0 +1,185 @@ +package io.armadaproject.jenkins.plugin.job; + +import api.EventOuterClass; +import api.SubmitOuterClass; +import org.apache.commons.lang.StringUtils; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public final class ArmadaClientUtil { + private ArmadaClientUtil() {} + + public static final Set TERMINAL_STATES = new HashSet<>(List.of(SubmitOuterClass.JobState.FAILED, SubmitOuterClass.JobState.CANCELLED, SubmitOuterClass.JobState.SUCCEEDED, SubmitOuterClass.JobState.PREEMPTED)); + + public static String lookoutUrlForJob(String lookoutBaseUrl, int lookoutPort, String queue, String jobSetId, String jobId) { + return StringUtils.stripEnd(lookoutBaseUrl, "/") + ":" + lookoutPort + "/?page=0&f[0][id]=queue&f[0][value][0]=" + + queue + "&f[0][match]=anyOf&f[1][id]=jobSet&f[1][value]=" + + jobSetId + "&f[1][match]=exact&f[2][id]=jobId&f[2][value]=" + + jobId + "&f[2][match]=exact"; + } + + public static SubmitOuterClass.JobState toJobState(EventOuterClass.EventMessage.EventsCase event) { + switch(event) { + case RUNNING: + return SubmitOuterClass.JobState.RUNNING; + case PENDING: + return SubmitOuterClass.JobState.PENDING; + case QUEUED: + return SubmitOuterClass.JobState.QUEUED; + case SUCCEEDED: + return SubmitOuterClass.JobState.SUCCEEDED; + case FAILED: + return SubmitOuterClass.JobState.FAILED; + case SUBMITTED: + return SubmitOuterClass.JobState.SUBMITTED; + case LEASED: + return SubmitOuterClass.JobState.LEASED; + case PREEMPTED: + return SubmitOuterClass.JobState.PREEMPTED; + case CANCELLED: + return SubmitOuterClass.JobState.CANCELLED; + default: + return SubmitOuterClass.JobState.UNKNOWN; + } + } + + public static boolean isInFailedState(SubmitOuterClass.JobState jobState) { + switch(jobState) { + case FAILED: + case REJECTED: + return true; + default: + return false; + } + } + + public static boolean isInFailedState(EventOuterClass.EventMessage.EventsCase jobState) { + switch(jobState) { + case FAILED: + case UNABLE_TO_SCHEDULE: + return true; + default: + return false; + } + } + + public static boolean isInTerminalState(SubmitOuterClass.JobState jobState) { + switch(jobState) { + case FAILED: + case CANCELLED: + case SUCCEEDED: + case PREEMPTED: + return true; + default: + return false; + } + } + + public static boolean isInTerminalState(EventOuterClass.EventMessage.EventsCase eventsCase) { + switch(eventsCase) { + case FAILED: + case CANCELLED: + case SUCCEEDED: + case PREEMPTED: + return true; + default: + return false; + } + } + + public static ArmadaJobMetadata extractMetadata(EventOuterClass.EventMessage eventMessage) { + String jobId = null; + String jobSetId = null; + String clusterId = null; + String podName = null; + String reason = null; + EventOuterClass.Cause cause = null; + switch(eventMessage.getEventsCase()) { + case SUBMITTED: + jobId = eventMessage.getSubmitted().getJobId(); + jobSetId = eventMessage.getSubmitted().getJobSetId(); + break; + case QUEUED: + jobId = eventMessage.getQueued().getJobId(); + jobSetId = eventMessage.getQueued().getJobSetId(); + break; + case LEASED: + jobId = eventMessage.getLeased().getJobId(); + jobSetId = eventMessage.getLeased().getJobSetId(); + break; + case LEASE_RETURNED: + jobId = eventMessage.getLeaseReturned().getJobId(); + jobSetId = eventMessage.getLeaseReturned().getJobSetId(); + break; + case LEASE_EXPIRED: + jobId = eventMessage.getLeaseExpired().getJobId(); + jobSetId = eventMessage.getLeaseExpired().getJobSetId(); + break; + case PENDING: + jobId = eventMessage.getPending().getJobId(); + jobSetId = eventMessage.getPending().getJobSetId(); + break; + case RUNNING: + var running = eventMessage.getRunning(); + jobSetId = running.getJobSetId(); + jobId = running.getJobId(); + clusterId = running.getClusterId(); + podName = running.getPodName(); + break; + case UNABLE_TO_SCHEDULE: + jobId = eventMessage.getUnableToSchedule().getJobId(); + jobSetId = eventMessage.getUnableToSchedule().getJobSetId(); + reason = eventMessage.getUnableToSchedule().getReason(); + break; + case FAILED: + jobId = eventMessage.getFailed().getJobId(); + jobSetId = eventMessage.getFailed().getJobSetId(); + reason = eventMessage.getFailed().getReason(); + cause = eventMessage.getFailed().getCause(); + break; + case SUCCEEDED: + jobId = eventMessage.getSucceeded().getJobId(); + jobSetId = eventMessage.getSucceeded().getJobSetId(); + break; + case REPRIORITIZED: + jobId = eventMessage.getReprioritized().getJobId(); + jobSetId = eventMessage.getReprioritized().getJobSetId(); + break; + case CANCELLING: + jobId = eventMessage.getCancelling().getJobId(); + jobSetId = eventMessage.getCancelling().getJobSetId(); + reason = eventMessage.getCancelling().getReason(); + break; + case CANCELLED: + jobId = eventMessage.getCancelled().getJobId(); + jobSetId = eventMessage.getCancelled().getJobSetId(); + reason = eventMessage.getCancelled().getReason(); + break; + case UTILISATION: + jobId = eventMessage.getUtilisation().getJobId(); + jobSetId = eventMessage.getUtilisation().getJobSetId(); + break; + case INGRESS_INFO: + jobId = eventMessage.getIngressInfo().getJobId(); + jobSetId = eventMessage.getIngressInfo().getJobSetId(); + break; + case REPRIORITIZING: + jobId = eventMessage.getReprioritizing().getJobId(); + jobSetId = eventMessage.getReprioritizing().getJobSetId(); + break; + case PREEMPTED: + jobId = eventMessage.getPreempted().getJobId(); + jobSetId = eventMessage.getPreempted().getJobSetId(); + reason = eventMessage.getPreempted().getReason(); + break; + case PREEMPTING: + jobId = eventMessage.getPreempting().getJobId(); + jobSetId = eventMessage.getPreempting().getJobSetId(); + reason = eventMessage.getPreempted().getReason(); + } + + return new ArmadaJobMetadata(jobSetId, jobId, podName, clusterId, reason, cause); + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaEventWatcher.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaEventWatcher.java new file mode 100644 index 000000000..94d9c7b8c --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaEventWatcher.java @@ -0,0 +1,9 @@ +package io.armadaproject.jenkins.plugin.job; + +import api.EventOuterClass; + +public interface ArmadaEventWatcher { + void onClose(); + + void onEvent(EventOuterClass.EventMessage message); +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaGarbageCollection.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaGarbageCollection.java new file mode 100644 index 000000000..49d20a78b --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaGarbageCollection.java @@ -0,0 +1,29 @@ +package io.armadaproject.jenkins.plugin.job; + +import hudson.Extension; +import hudson.model.AsyncPeriodicWork; +import hudson.model.TaskListener; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +public class ArmadaGarbageCollection { + @Extension + public static final class PeriodicGarbageCollection extends AsyncPeriodicWork { + public PeriodicGarbageCollection() { + super("Periodic cleanup of armada plugin state and jobs"); + } + + @Override + protected void execute(TaskListener listener) throws IOException, InterruptedException { + var state = ArmadaState.getInstance(); + state.runCleanup(); + state.save(); + } + + @Override + public long getRecurrencePeriod() { + return TimeUnit.MINUTES.toMillis(5); + } + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobManager.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobManager.java new file mode 100644 index 000000000..9dc6bb4a7 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobManager.java @@ -0,0 +1,239 @@ +package io.armadaproject.jenkins.plugin.job; + +import api.EventOuterClass; +import hudson.model.Saveable; +import io.armadaproject.ArmadaClient; +import io.fabric8.kubernetes.api.model.Pod; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import org.apache.commons.lang.StringUtils; +import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; + +import java.io.Closeable; +import java.io.IOException; +import java.io.Serializable; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class ArmadaJobManager implements Serializable, ArmadaClientProvider, ArmadaEventWatcher { + private static final Logger LOGGER = Logger.getLogger(ArmadaJobManager.class.getName()); + + private transient final Map eventWatchers; + private final Object jobSetIdLock; + private final ConcurrentMap jobSetManagers; + private final Saveable save; + + private volatile String currentJobSetId; + private volatile ArmadaClientParameters parameters; + + public ArmadaJobManager(ArmadaClientParameters parameters, Saveable save) { + this(parameters, save, new Object(), new ConcurrentHashMap<>(), null); + } + + private ArmadaJobManager(ArmadaClientParameters parameters, Saveable save, Object jobSetIdLock, ConcurrentMap jobSetManagers, String currentJobSetId) { + this.parameters = parameters; + this.save = save; + this.jobSetIdLock = jobSetIdLock; + this.jobSetManagers = jobSetManagers; + this.currentJobSetId = currentJobSetId; + this.eventWatchers = new ConcurrentHashMap<>(); + } + + public Closeable watchEvents(ArmadaEventWatcher eventWatcher) { + final var id = UUID.randomUUID().toString(); + eventWatchers.put(id, eventWatcher); + return () -> { + var removed = eventWatchers.remove(id); + if(removed != null) { + removed.onClose(); + } + }; + } + + public int getValidity() { + return parameters.hashCode(); + } + + public boolean reconfigure(ArmadaClientParameters parameters) { + var current = this.parameters; + var changed = false; + + if(!current.jobSetStrategy.equals(parameters.jobSetStrategy)) { + changed = true; + } + + // changed api url, close all jobset managers/kill all jobs + if(!current.apiUrl.equals(parameters.apiUrl)) { + jobSetManagers.forEach((k, jsm) -> jsm.close()); + jobSetManagers.clear(); + changed = true; + } else if(current.apiPort != parameters.apiPort || + !current.queue.equals(parameters.queue) || + !current.namespace.equals(parameters.namespace)|| + !StringUtils.equals(current.credentialsId, parameters.credentialsId)) { + jobSetManagers.forEach((k, jsm) -> jsm.reconfigure(parameters.namespace, parameters.queue)); + changed = true; + } + + if(changed) { + this.parameters = parameters; + } + + return changed; + } + + public ArmadaJobMetadata ensurePod(String existingJobSetId, String existingJobId, Pod pod) { + ArmadaJobMetadata result = null; + try { + if (existingJobSetId != null && existingJobId != null) { + result = getJobSetManager(existingJobSetId).ensureJob(pod, existingJobId); + } else { + result = getJobSetManager(computeJobSetId()).ensureJob(pod, null); + } + } catch(StatusRuntimeException e) { + handleGrpcError(parameters.queue, e); + } + + trySave(); + return result; + } + + public void cancelJob(String jobSetId, String jobId) { + getJobSetManager(jobSetId).cancelJob(jobId); + trySave(); + } + + public boolean hasFailed(String jobSetId, String jobId) { + return getJobSetManager(jobSetId).hasFailed(jobId); + } + + public boolean hasTerminated(String jobSetId, String jobId) { + return getJobSetManager(jobSetId).hasTerminated(jobId); + } + + public ArmadaJobMetadata waitUntilRunning(String jobSetId, String jobId, long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { + return getJobSetManager(jobSetId).waitUntilRunning(jobId, timeout, unit); + } + + public void waitUntilTerminated(String jobSetId, String jobId, long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { + getJobSetManager(jobSetId).waitUntilTerminated(jobId, timeout, unit); + } + + public void close() { + jobSetManagers.forEach((k, jsm) -> jsm.close()); + jobSetManagers.clear(); + eventWatchers.forEach((k, v) -> v.onClose()); + eventWatchers.clear(); + } + + @Override + public ArmadaClient get() throws KubernetesAuthException { + return ArmadaState.createClient(parameters); + } + + public void cleanupAbandonedJobSets() { + var jobSetIds = new HashSet<>(jobSetManagers.keySet()); + for(var jobSetId : jobSetIds) { + var jobSetManager = getJobSetManager(jobSetId); + if(jobSetManager.isAbandoned() && !jobSetManager.hasActiveJobs()) { + var removed = jobSetManagers.remove(jobSetId); + if(removed != null) { + removed.close(); + } + } + } + } + + public void cleanupAbandonedJobs(HashMap> jobsPerJobSetId) { + jobsPerJobSetId.keySet().forEach(jobSetId -> { + var jobSetManager = jobSetManagers.get(jobSetId); + if(jobSetManager != null) { + jobSetManager.cleanupAbandonedJobs(jobsPerJobSetId.get(jobSetId)); + } + }); + } + + protected void evaluateJobSetId() { + computeJobSetId(); + } + + protected Object readResolve() { + return new ArmadaJobManager(parameters, save, jobSetIdLock, jobSetManagers, currentJobSetId); + } + + private void handleGrpcError(String queue, StatusRuntimeException e) { + var code = e.getStatus().getCode(); + var message = e.getStatus().getDescription(); + if((code == Status.Code.PERMISSION_DENIED || code == Status.Code.NOT_FOUND) && StringUtils.contains(message, "queue")) { + // if not perms for queue or it's not found + // make sure we clean up the jobset event listener to prevent it from trying to continuously connect + // unlikely the jobset will exists if we can't submit jobs to the configured queue + + invalidateQueueConfig(queue); + } + throw e; + } + + private void invalidateQueueConfig(String queue) { + jobSetManagers.forEach((k, v) -> v.invalidateConfig(queue)); + } + + private synchronized void trySave() { + try { + save.save(); + } catch (IOException e) { + LOGGER.log(Level.WARNING, "Unable to save ArmadaJobManager state", e); + } + } + + private String computeJobSetId() { + var currentJobSetId = parameters.jobSetStrategy.getCurrentJobSet(); + synchronized (jobSetIdLock) { + if(!currentJobSetId.equals(this.currentJobSetId)) { + this.currentJobSetId = currentJobSetId; + abandonExpiredJobSetManagers(); + } + } + return currentJobSetId; + } + + private void abandonExpiredJobSetManagers() { + var toAbandon = new HashSet<>(jobSetManagers.keySet()); + if(currentJobSetId != null) { + toAbandon.remove(currentJobSetId); + } + for(var jobSetId : toAbandon) { + getJobSetManager(jobSetId).abandon(); + } + } + + // this will actually initialize the job set manager and start watching for events + private ArmadaJobSetManager getJobSetManager(String jobSet) { + var params = parameters; + var newJobSetManager = new ArmadaJobSetManager(params.queue, jobSet, params.namespace); + var result = jobSetManagers.putIfAbsent(jobSet, newJobSetManager); + if(result == null) { + result = newJobSetManager; + trySave(); + } + result.initialize(this, this); + return result; + } + + @Override + public void onClose() { + // ignore + } + + @Override + public void onEvent(EventOuterClass.EventMessage message) { + eventWatchers.forEach((k, v) -> { + v.onEvent(message); + }); + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobMetadata.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobMetadata.java new file mode 100644 index 000000000..9a14df89a --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobMetadata.java @@ -0,0 +1,67 @@ +package io.armadaproject.jenkins.plugin.job; + +import api.EventOuterClass; + +public class ArmadaJobMetadata { + private final String jobSetId; + private final String jobId; + private final String podName; + private final String clusterId; + private final String reason; + private final EventOuterClass.Cause cause; + + public ArmadaJobMetadata(String jobSetId, String jobId, String podName, String clusterId) { + this(jobSetId, jobId, podName, clusterId, null, null); + } + + public ArmadaJobMetadata(String jobSetId, String jobId, String podName, String clusterId, String reason, EventOuterClass.Cause cause) { + this.jobSetId = jobSetId; + this.jobId = jobId; + this.podName = podName; + this.clusterId = clusterId; + this.reason = reason; + this.cause = cause; + } + + public String getJobId() { + return jobId; + } + + public String getPodName() { + return podName; + } + + public String getClusterId() { + return clusterId; + } + + public ArmadaJobMetadata mergeWith(final ArmadaJobMetadata other) { + String podName; + String clusterId; + if (other.podName != null) { + podName = other.podName; + } else { + podName = this.podName; + } + + if (other.clusterId != null) { + clusterId = other.clusterId; + } else { + clusterId = this.clusterId; + } + + return new ArmadaJobMetadata(jobSetId, jobId, podName, clusterId); + } + + public String getJobSetId() { + return jobSetId; + } + + public String getReason() { + return reason; + } + + public EventOuterClass.Cause getCause() { + return cause; + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobNotifier.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobNotifier.java new file mode 100644 index 000000000..bdc19a861 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobNotifier.java @@ -0,0 +1,78 @@ +package io.armadaproject.jenkins.plugin.job; + +import api.SubmitOuterClass; + +import java.util.List; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CopyOnWriteArrayList; + +public class ArmadaJobNotifier { + public interface Callback { + void accept(ArmadaJobMetadata metadata); + + void error(RuntimeException e); + + void cancelled(); + } + + private static class Key { + private final String jobId; + private final SubmitOuterClass.JobState state; + + private Key(String jobId, SubmitOuterClass.JobState state) { + this.jobId = jobId; + this.state = state; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Key)) return false; + Key key = (Key) o; + return Objects.equals(jobId, key.jobId) && state == key.state; + } + + @Override + public int hashCode() { + return Objects.hash(jobId, state); + } + } + + private final ConcurrentMap> callbacks = new ConcurrentHashMap<>(); + + public void close() { + callbacks.forEach((jobId, callbacks) -> callbacks.forEach(Callback::cancelled)); + callbacks.clear(); + } + + public void subscribe(String jobId, SubmitOuterClass.JobState state, Callback callback) { + var key = new Key(jobId, state); + callbacks.computeIfAbsent(key, k -> new CopyOnWriteArrayList<>()).add(callback); + } + + public void unsubscribe(String jobId, SubmitOuterClass.JobState state, Callback callback) { + var key = new Key(jobId, state); + var subscriptions = callbacks.get(key); + if(subscriptions != null) { + subscriptions.remove(callback); + if(subscriptions.isEmpty()) { + callbacks.remove(key); + } + } + } + + public void notify(ArmadaJobMetadata metadata, SubmitOuterClass.JobState state, RuntimeException error) { + var key = new Key(metadata.getJobId(), state); + var list = callbacks.get(key); + if (list != null) { + list.forEach(callback -> { + if (error != null) { + callback.error(error); + } else { + callback.accept(metadata); + } + }); + } + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetEventWatcher.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetEventWatcher.java new file mode 100644 index 000000000..5e5f29f1c --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetEventWatcher.java @@ -0,0 +1,182 @@ +package io.armadaproject.jenkins.plugin.job; + +import api.EventOuterClass; +import com.sun.istack.NotNull; +import io.armadaproject.ArmadaClient; +import io.grpc.Context; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import io.grpc.stub.StreamObserver; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executor; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.logging.Level; +import java.util.logging.Logger; + + +public class ArmadaJobSetEventWatcher implements Runnable { + private static final Logger LOGGER = Logger.getLogger(ArmadaJobSetEventWatcher.class.getName()); + + public interface ArmadaMessageCallback { + void onMessage(EventOuterClass.EventStreamMessage eventStreamMessage); + } + + private final Object cancellationLock = new Object(); + private final AtomicReference innerContextRef = new AtomicReference<>(); + private final AtomicBoolean reconnect = new AtomicBoolean(false); + private final AtomicBoolean waitForReconnect = new AtomicBoolean(false); + private final ArmadaClientProvider armadaClientProvider; + private final ArmadaMessageCallback messageCallback; + private volatile String queue; + private final String jobSetId; + private final String fromMessageId; + + public ArmadaJobSetEventWatcher(ArmadaClientProvider armadaClientProvider, ArmadaMessageCallback messageCallback, String queue, String jobSetId, String fromMessageId) { + this.armadaClientProvider = armadaClientProvider; + this.messageCallback = messageCallback; + this.queue = queue; + this.jobSetId = jobSetId; + this.fromMessageId = fromMessageId; + } + + public void close() { + synchronized (cancellationLock) { + cancellationLock.notify(); + } + } + + public void forceReconnect(String queue, boolean invalidConfig) { + this.queue = queue; + reconnect.set(true); + waitForReconnect.set(invalidConfig); + var innerContext = innerContextRef.get(); + if(innerContext != null) { + innerContext.cancel(null); + } + } + + @Override + public void run() { + try(var cancellableContext = Context.current().withCancellation()) { + var cancelThread = new Thread(() -> { + try { + synchronized (cancellationLock) { + cancellationLock.wait(); + } + } catch (InterruptedException e) { + // left empty + } + cancellableContext.cancel(null); + }); + cancelThread.setDaemon(true); + cancelThread.start(); + + cancellableContext.run(() -> { + final AtomicReference fromMessageId = new AtomicReference<>(this.fromMessageId); + var requestBuilder = EventOuterClass.JobSetRequest.newBuilder() + .setId(jobSetId) + .setErrorIfMissing(false) + .setWatch(true); + + while(!cancellableContext.isCancelled()) { + try(var client = getClient()) { + var builder = requestBuilder.setQueue(queue); + var msgId = fromMessageId.get(); + if(msgId != null) { + builder = builder.setFromMessageId(msgId); + } + LOGGER.info("Starting to stream events for queue " + queue + " and jobSetId " + jobSetId); + + final var completed = new CountDownLatch(1); + final var error = new AtomicReference(); + final var innerContext = cancellableContext.withCancellation(); + innerContextRef.set(innerContext); + final var request = builder.build(); + + innerContext.run(() -> { + if(waitForReconnect.get()) { + LOGGER.info("Config invalidated, waiting for reconnect..."); + var cancelled = new CountDownLatch(1); + Context.CancellationListener cancellationListener = context -> { + cancelled.countDown(); + }; + innerContext.addListener(cancellationListener, runnable -> { + cancelled.countDown(); + }); + try { + cancelled.await(); + } catch (InterruptedException e) { + // left empty + } + innerContext.removeListener(cancellationListener); + } else { + + client.streamEvents(request, new StreamObserver<>() { + @Override + public void onNext(EventOuterClass.EventStreamMessage eventStreamMessage) { + try { + messageCallback.onMessage(eventStreamMessage); + } catch (Throwable e) { + // avoid throwing here by all costs, it kills the grpc cancellable and this whole thing falls apart + } + fromMessageId.set(eventStreamMessage.getId()); + } + + @Override + public void onError(Throwable throwable) { + var isError = true; + if (throwable instanceof StatusRuntimeException) { + var status = (StatusRuntimeException) throwable; + isError = status.getStatus().getCode() != Status.Code.CANCELLED; + } + if (isError) { + LOGGER.log(Level.SEVERE, "Failed to stream events for queue " + queue + " and jobSetId " + jobSetId, throwable); + error.set(throwable); + } + completed.countDown(); + } + + @Override + public void onCompleted() { + LOGGER.info("Finished streaming events for queue " + queue + " and jobSetId " + jobSetId); + completed.countDown(); + } + }); + try { + if (reconnect.compareAndExchange(true, false)) { + innerContext.cancel(null); + } + completed.await(); + + var lastError = error.get(); + if (lastError != null) { + LOGGER.log(Level.WARNING, "error while listening to armada events", lastError); + if (lastError instanceof StatusRuntimeException) { + var statusEx = (StatusRuntimeException) lastError; + var code = statusEx.getStatus().getCode(); + LOGGER.info("Failed to watch armada jobset " + jobSetId + " code: " + code + " ...waiting before retrying..."); + Thread.sleep(5000); + } + } + } catch (InterruptedException e) { + LOGGER.log(Level.SEVERE, "Thread interrupted while waiting for stream completion"); + } + LOGGER.info("Finished streaming events for queue " + queue + (!cancellableContext.isCancelled() ? " reconnecting..." : "")); + } + }); + } + } + }); + } + } + + private ArmadaClient getClient() { + try { + return armadaClientProvider.get(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetManager.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetManager.java new file mode 100644 index 000000000..46b62657a --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetManager.java @@ -0,0 +1,388 @@ +package io.armadaproject.jenkins.plugin.job; + +import api.EventOuterClass; +import api.Job; +import api.SubmitOuterClass; +import io.fabric8.kubernetes.api.model.Pod; +import jenkins.metrics.api.Metrics; +import org.apache.commons.lang.StringUtils; +import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; + +import java.io.Serializable; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static io.armadaproject.jenkins.plugin.MetricNames.metricNameForPodStatus; +import static io.armadaproject.jenkins.plugin.job.ArmadaClientUtil.*; + +public class ArmadaJobSetManager implements Serializable, ArmadaJobSetEventWatcher.ArmadaMessageCallback { + private static final Logger LOGGER = Logger.getLogger(ArmadaJobSetManager.class.getName()); + + private final ConcurrentMap knownJobs = new ConcurrentHashMap<>(); + private final String jobSetId; + private volatile String queue; + private volatile String namespace; + private volatile String lastMessageId = null; + private volatile boolean abandoned = false; + + private transient volatile ArmadaJobSetEventWatcher watcher; + private transient volatile Thread watcherThread; + private transient volatile ArmadaClientProvider clientProvider; + private transient volatile ArmadaEventWatcher eventWatcher; + private transient volatile ArmadaJobNotifier jobNotifier; + + private static class JobStatus { + private final ArmadaJobMetadata metadata; + private final SubmitOuterClass.JobState state; + + private JobStatus(ArmadaJobMetadata metadata, SubmitOuterClass.JobState state) { + this.metadata = metadata; + this.state = state; + } + } + + public ArmadaJobSetManager(String queue, String jobSetId, String namespace) { + if(StringUtils.isEmpty(queue)) { + throw new IllegalArgumentException("queue is empty"); + } + + if(StringUtils.isEmpty(jobSetId)) { + throw new IllegalArgumentException("jobSetId is empty"); + } + + if(StringUtils.isEmpty(namespace)) { + throw new IllegalArgumentException("namespace is empty"); + } + + this.queue = queue; + this.jobSetId = jobSetId; + this.namespace = namespace; + } + + public void abandon() { + this.abandoned = true; + } + + public boolean isAbandoned() { + return this.abandoned; + } + + public boolean hasActiveJobs() { + if(knownJobs.isEmpty()) { + return false; + } + + if(knownJobs.values().stream().noneMatch(s -> s.state != SubmitOuterClass.JobState.UNKNOWN && !isInTerminalState(s.state))) { + return false; + } + + try(var client = clientProvider.get()) { + var response = client.getJobStatus(Job.JobStatusRequest.newBuilder().addAllJobIds(knownJobs.keySet()).build()); + return response.getJobStatesMap().values().stream().anyMatch(s -> s != SubmitOuterClass.JobState.UNKNOWN && !isInTerminalState(s)); + } catch (KubernetesAuthException e) { + LOGGER.log(Level.SEVERE, "Error while querying known job statuses jobset " + jobSetId, e); + //return false here, this will make sure everything is closed down if run into an error + return false; + } + } + + public void cancelJob(String jobId) { + try(var client = clientProvider.get()) { + var response = client.getJobStatus(Job.JobStatusRequest.newBuilder().addJobIds(jobId).build()); + if (!isInTerminalState(response.getJobStatesMap().get(jobId))) { + client.cancelJob(SubmitOuterClass.JobCancelRequest.newBuilder() + .setQueue(queue) + .setJobSetId(jobSetId) + .setJobId(jobId) + .build()); + + String msg = ("Cancelled job id: " + jobId + " with job set id: " + + jobSetId); + LOGGER.info(msg); + } else { + String msg = ("No jobs in a non-terminal state for id: " + jobId + " with job set id: " + + jobSetId); + LOGGER.log(Level.WARNING, msg); + } + } catch (KubernetesAuthException e) { + LOGGER.log(Level.SEVERE, "Error while cancelling job", e); + throw new RuntimeException(e); + } + } + + public ArmadaJobMetadata ensureJob(Pod pod, String existingJobId) { + if(abandoned) { + throw new IllegalStateException("JobSet abandoned"); + } + + try(var client = clientProvider.get()) { + tryRefreshJobState(existingJobId); + // if the controller was interrupted after creating the pod but before it connected back, then + // the pod might already exist and the creating logic must be skipped. + var needsSubmit = existingJobId == null; + if(existingJobId != null && knownJobs.containsKey(existingJobId)) { + var knownJob = knownJobs.getOrDefault(existingJobId, null); + needsSubmit = knownJob == null || isInTerminalState(knownJob.state); + if(knownJob != null && isInTerminalState(knownJob.state)) { + knownJobs.remove(existingJobId); + } + } else { + needsSubmit = true; + } + + if(needsSubmit) { + ArmadaMapper armadaMapper = new ArmadaMapper(queue, namespace, jobSetId, pod); + var jobSubmitResponse = client.submitJob(armadaMapper.createJobSubmitRequest()); + var jobId = jobSubmitResponse.getJobResponseItems(0).getJobId(); + knownJobs.put(jobId, new JobStatus(null, SubmitOuterClass.JobState.UNKNOWN)); + return new ArmadaJobMetadata(jobSetId, jobId, null, null); + } + + return new ArmadaJobMetadata(jobSetId, existingJobId, null, null); + } catch (KubernetesAuthException e) { + LOGGER.log(Level.SEVERE, "Failed to create job", e); + throw new RuntimeException(e); + } + } + + public void initialize(ArmadaClientProvider clientProvider, ArmadaEventWatcher eventWatcher) { + if(watcher == null) { + this.eventWatcher = eventWatcher; + this.clientProvider = clientProvider; + this.jobNotifier = new ArmadaJobNotifier(); + + // initialize known job states if we are loading from saved state + try(var client = clientProvider.get()) { + var request = Job.JobStatusRequest.newBuilder().addAllJobIds(knownJobs.keySet()).build(); + var response = client.getJobStatus(request); + var jobStateMap = response.getJobStatesMap(); + jobStateMap.forEach((jobId, jobState) -> { + if(isInTerminalState(jobState)) { + knownJobs.remove(jobId); + } else { + knownJobs.computeIfPresent(jobId, (k, v) -> new JobStatus(v.metadata, jobState)); + } + }); + } catch (KubernetesAuthException e) { + LOGGER.log(Level.SEVERE, "Failed to set armada job client", e); + throw new RuntimeException(e); + } + + watcher = new ArmadaJobSetEventWatcher(clientProvider, this, queue, jobSetId, lastMessageId); + watcherThread = new Thread(watcher); + watcherThread.setDaemon(true); + watcherThread.start(); + } + } + + public void cleanupAbandonedJobs(Set agentAssociatedJobIds) { + knownJobs.keySet().forEach(j -> { + if (!agentAssociatedJobIds.contains(j)) { + cancelJob(j); + } + }); + } + + public void waitUntilTerminated(String jobId, long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { + waitUntilState(jobId, ArmadaClientUtil.TERMINAL_STATES, timeout, unit); + } + + public ArmadaJobMetadata waitUntilRunning(String jobId, long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { + return waitUntilState(jobId, new HashSet<>(List.of(SubmitOuterClass.JobState.RUNNING)), timeout, unit); + } + + public boolean hasFailed(String jobId) { + var job = knownJobs.getOrDefault(jobId, null); + return job != null && isInFailedState(job.state); + } + + public boolean hasTerminated(String jobId) { + var job = knownJobs.getOrDefault(jobId, null); + return job != null && isInTerminalState(job.state); + } + + @Override + public void onMessage(EventOuterClass.EventStreamMessage eventStreamMessage) { + var message = eventStreamMessage.getMessage(); + var eventsCase = message.getEventsCase(); + var jobMetadata = extractMetadata(message); + + var jobId = jobMetadata.getJobId(); + final var state = toJobState(eventsCase); + + Metrics.metricRegistry().counter(metricNameForPodStatus(state.toString())).inc(); + + knownJobs.compute(jobId, (k, v) -> updateJobStatus(k, v, jobMetadata, state)); + if(isInFailedState(state)) { + jobNotifier.notify(new ArmadaJobMetadata(jobSetId, jobId, null, null), state, new RuntimeException("Job " + state)); + } else { + jobNotifier.notify(jobMetadata, state, null); + } + + if(isInTerminalState(eventsCase)) { + knownJobs.remove(jobId); + } + + if(eventWatcher != null) { + eventWatcher.onEvent(message); + } + + lastMessageId = eventStreamMessage.getId(); + } + + public void invalidateConfig(String queue) { + if(StringUtils.equals(queue, this.queue)) { + var currentWatcher = watcher; + if(currentWatcher != null) { + currentWatcher.forceReconnect(queue, true); + } + } + } + + public void reconfigure(String namespace, String queue) { + this.namespace = namespace; + this.queue = queue; + var currentWatcher = watcher; + if(currentWatcher != null) { + currentWatcher.forceReconnect(queue, false); + } + } + + public void close() { + var currentWatcher = watcher; + watcher = null; + if(currentWatcher != null) { + currentWatcher.close(); + try { + watcherThread.join(); + } catch (InterruptedException e) { + LOGGER.log(Level.SEVERE, "Interrupted while waiting for watcher to complete", e); + } + } + + if(jobNotifier != null) { + jobNotifier.close(); + } + + if(clientProvider != null) { + try (var client = clientProvider.get()) { + client.cancelJob(SubmitOuterClass.JobCancelRequest.newBuilder() + .setQueue(queue) + .setJobSetId(jobSetId) + .addAllJobIds(knownJobs.keySet()) + .build()); + } catch (KubernetesAuthException e) { + LOGGER.log(Level.SEVERE, "Error while cancelling jobs", e); + } + } + } + + private static JobStatus updateJobStatus(String k, JobStatus v, ArmadaJobMetadata jobMetadata, SubmitOuterClass.JobState state) { + return new JobStatus(v != null && v.metadata != null ? v.metadata.mergeWith(jobMetadata) : jobMetadata, + state != SubmitOuterClass.JobState.UNKNOWN ? state : (v == null ? SubmitOuterClass.JobState.UNKNOWN : v.state)); + } + + private ArmadaJobMetadata waitUntilState(String jobId, Set states, long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { + var job = knownJobs.getOrDefault(jobId, null); + if(job == null) { + throw new IllegalArgumentException("Unknown job: " + jobId); + } + + // check if we already know the job is in required state + if(states.contains(job.state)) { + return job.metadata; + } + + + final ArmadaJobMetadata preWaitMetadata = job.metadata; + final AtomicReference metadata = new AtomicReference<>(job.metadata); + final AtomicBoolean cancelled = new AtomicBoolean(false); + final AtomicReference error = new AtomicReference<>(); + final CountDownLatch latch = new CountDownLatch(1); + ArmadaJobNotifier.Callback callback = new ArmadaJobNotifier.Callback() { + @Override + public void accept(ArmadaJobMetadata newMetadata) { + metadata.set(newMetadata); + latch.countDown(); + } + + @Override + public void error(RuntimeException e) { + error.set(e); + latch.countDown(); + } + + @Override + public void cancelled() { + cancelled.set(true); + latch.countDown(); + } + }; + // subscribe to get state events + states.forEach(state -> jobNotifier.subscribe(jobId, state, callback)); + + var timedOut = false; + // check again if it maybe already in required state before waiting + // if the job terminated it has already been removed + job = knownJobs.getOrDefault(jobId, null); + if(job == null) { + states.forEach(state -> jobNotifier.unsubscribe(jobId, state, callback)); + if(!states.stream().anyMatch(ArmadaClientUtil::isInTerminalState)) { + throw new IllegalArgumentException("Job already terminated: " + jobId); + } + return preWaitMetadata; + } else if(!states.contains(job.state)) { + timedOut = !latch.await(timeout, unit); + } else { + states.forEach(state -> jobNotifier.unsubscribe(jobId, state, callback)); + return job.metadata; + } + + if(!cancelled.get()) { + states.forEach(state -> jobNotifier.unsubscribe(jobId, state, callback)); + } else { + throw new CancellationException("Armada event watcher cancelled"); + } + + var err = error.get(); + if(err != null) { + throw err; + } + + if(timedOut) { + job = knownJobs.get(jobId); + if(job == null && states.stream().anyMatch(ArmadaClientUtil::isInTerminalState)) { + return preWaitMetadata; + } + else if(job != null && states.contains(job.state)) { + return job.metadata; + } else { + throw new TimeoutException("Timed out waiting for job: " + jobId); + } + } else { + return metadata.get(); + } + } + + private void tryRefreshJobState(String jobId) + { + if(jobId != null) { + var request = Job.JobStatusRequest.newBuilder().addJobIds(jobId).build(); + try (var client = clientProvider.get()) { + var response = client.getJobStatus(request); + var jobStateMap = response.getJobStatesMap(); + if (jobStateMap.containsKey(jobId)) { + knownJobs.compute(jobId, (k, v) -> updateJobStatus(k, v, null, jobStateMap.get(jobId))); + } + } catch (KubernetesAuthException e) { + LOGGER.log(Level.SEVERE, "Error while cancelling job", e); + } + } + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetStrategy.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetStrategy.java new file mode 100644 index 000000000..dccab6bfa --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaJobSetStrategy.java @@ -0,0 +1,5 @@ +package io.armadaproject.jenkins.plugin.job; + +public interface ArmadaJobSetStrategy { + String getCurrentJobSet(); +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaLaunchFailedOfflineCause.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaLaunchFailedOfflineCause.java new file mode 100644 index 000000000..edb39cbac --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaLaunchFailedOfflineCause.java @@ -0,0 +1,17 @@ +package io.armadaproject.jenkins.plugin.job; + +import hudson.slaves.OfflineCause; +import org.kohsuke.stapler.export.Exported; + +public class ArmadaLaunchFailedOfflineCause extends OfflineCause { + public final String description; + + public ArmadaLaunchFailedOfflineCause(String description) { + this.description = description; + } + + @Exported(name = "description") + public String toString() { + return this.description; + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaState.java b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaState.java new file mode 100644 index 000000000..0c2c163c5 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/ArmadaState.java @@ -0,0 +1,208 @@ +package io.armadaproject.jenkins.plugin.job; + +import com.cloudbees.plugins.credentials.common.StandardCredentials; +import hudson.Extension; +import hudson.XmlFile; +import hudson.model.AsyncPeriodicWork; +import hudson.model.Saveable; +import hudson.model.TaskListener; +import hudson.model.listeners.SaveableListener; +import io.armadaproject.ArmadaClient; +import io.armadaproject.jenkins.plugin.ArmadaCloud; +import io.armadaproject.jenkins.plugin.ArmadaComputer; +import io.armadaproject.jenkins.plugin.ArmadaSlave; +import jenkins.model.Jenkins; +import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; +import org.jenkinsci.plugins.plaincredentials.StringCredentials; + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static io.armadaproject.jenkins.plugin.KubernetesFactoryAdapter.resolveCredentials; + +public class ArmadaState implements Saveable, Serializable { + private static final Logger LOGGER = Logger.getLogger(ArmadaState.class.getName()); + + private static ArmadaState instance; + private final ConcurrentMap jobManagers = new ConcurrentHashMap<>(); + + @Extension + public static final class PeriodicSave extends AsyncPeriodicWork { + public PeriodicSave() { + super("Periodic save of armada plugin state"); + } + + @Override + protected void execute(TaskListener listener) throws IOException, InterruptedException { + ArmadaState.getInstance().save(); + } + + @Override + public long getRecurrencePeriod() { + return TimeUnit.SECONDS.toMillis(60); + } + } + + @Extension(ordinal = 1) + public static class SaveableListenerImpl extends SaveableListener { + @Override + public void onChange(Saveable o, XmlFile file) { + if (o instanceof Jenkins) { + Jenkins jenkins = (Jenkins) o; + getInstance().reconfigure(jenkins.clouds.getAll(ArmadaCloud.class)); + } + super.onChange(o, file); + } + } + + protected ArmadaState() { + LOGGER.info("ArmadaState created"); + } + + public static ArmadaJobManager getJobManager(ArmadaCloud cloud) { + return getInstance().doGetJobManager(cloud); + } + + private synchronized void reconfigure(List clouds) { + var keys = new HashSet<>(jobManagers.keySet()); + var changed = false; + for (var cloud : clouds) { + String displayName = cloud.getDisplayName(); + var jobManager = getJobManager(cloud); + changed = jobManager.reconfigure(toParameters(cloud)); + keys.remove(displayName); + } + for(var cloudName : keys) { + changed = true; + jobManagers.remove(cloudName).close(); + } + + if(changed) { + trySave(); + } + } + + private static ArmadaClientParameters toParameters(ArmadaCloud cloud) { + return new ArmadaClientParameters( + cloud.getArmadaUrl(), + Integer.parseInt(cloud.getArmadaPort()), + cloud.getArmadaQueue(), + cloud.getArmadaNamespace(), + cloud.getArmadaCredentialsId(), + cloud.getJobSetStrategy() + ); + } + + private ArmadaJobManager doGetJobManager(ArmadaCloud cloud) { + jobManagers.computeIfAbsent(cloud.getDisplayName(), (cloudName) -> new ArmadaJobManager(toParameters(cloud), this)); + trySave(); + return jobManagers.get(cloud.getDisplayName()); + } + + public static ArmadaClient createClient(ArmadaClientParameters params) throws KubernetesAuthException { + if(params.credentialsId == null) { + return new ArmadaClient(params.apiUrl, params.apiPort); + } + + StandardCredentials standardCredentials = resolveCredentials(params.credentialsId); + if (!(standardCredentials instanceof StringCredentials)) { + throw new KubernetesAuthException("credentials not a string credentials"); + } + + String secret = ((StringCredentials) standardCredentials).getSecret().getPlainText(); + + return new ArmadaClient(params.apiUrl, params.apiPort, secret); + } + + @Override + public synchronized void save() throws IOException { + getConfigFile().write(this); + } + + protected void runCleanup() { + var activeAgents = new HashMap>(); + Arrays.stream(Jenkins.get().getComputers()).filter(c -> c instanceof ArmadaComputer).forEach(c -> { + var agent = ((ArmadaComputer)c).getNode(); + activeAgents.computeIfAbsent(agent.getCloudName(), (cloudName) -> new ArrayList<>()).add(agent); + }); + + jobManagers.forEach((cloudName, jobManager) -> { + try { + if (activeAgents.containsKey(cloudName)) { + jobManager.evaluateJobSetId(); + var jobsPerJobSet = new HashMap>(); + activeAgents.get(cloudName).forEach(a -> { + var jobSetId = a.getArmadaJobSetId(); + var jobId = a.getArmadaJobId(); + if (jobSetId != null && jobId != null) { + jobsPerJobSet.computeIfAbsent(jobSetId, (c) -> new HashSet<>()).add(jobId); + } + }); + jobManager.cleanupAbandonedJobs(jobsPerJobSet); + } + } catch(Throwable e) { + LOGGER.log(Level.WARNING, "Failed to clean up abandoned jobs", e); + } + + jobManager.cleanupAbandonedJobSets(); + }); + } + + protected static synchronized ArmadaState getInstance() { + if (instance == null) { + var configFile = getConfigFile(); + if (!configFile.exists()) { + instance = createNewAndSave(); + } else { + + try { + instance = (ArmadaState) configFile.read(); + } catch (Throwable e) { + try { + configFile.delete(); + } catch (Throwable ex) { + LOGGER.log(Level.SEVERE, "Failed to delete config file", ex); + throw new RuntimeException(e); + } + instance = createNewAndSave(); + } + } + } + return instance; + } + + private static ArmadaState createNewAndSave() { + ArmadaState result; + result = new ArmadaState(); + try { + result.save(); + } catch(Throwable ex) { + LOGGER.log(Level.SEVERE, "Failed to save empty config file", ex); + } + return result; + } + + private void trySave() { + try { + save(); + } catch (IOException e) { + LOGGER.log(Level.WARNING, "Failed to save armada state", e); + } + } + + private static XmlFile getConfigFile() { + var dir = new File(Jenkins.get().getRootDir(), "armada-plugin"); + if (!dir.exists()) { + //noinspection ResultOfMethodCallIgnored + dir.mkdirs(); + } + return new XmlFile(Jenkins.XSTREAM, new File(dir, "job-manager.xml")); + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/job/DailyArmadaJobSetStrategy.java b/src/main/java/io/armadaproject/jenkins/plugin/job/DailyArmadaJobSetStrategy.java new file mode 100644 index 000000000..ea78eada7 --- /dev/null +++ b/src/main/java/io/armadaproject/jenkins/plugin/job/DailyArmadaJobSetStrategy.java @@ -0,0 +1,30 @@ +package io.armadaproject.jenkins.plugin.job; + +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Objects; + +public class DailyArmadaJobSetStrategy implements ArmadaJobSetStrategy { + private final String jobSetPrefix; + + public DailyArmadaJobSetStrategy(String jobSetPrefix) { + this.jobSetPrefix = jobSetPrefix; + } + + @Override + public String getCurrentJobSet() { + return jobSetPrefix + new SimpleDateFormat("-ddMMyyyy").format(new Date()); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof DailyArmadaJobSetStrategy)) return false; + DailyArmadaJobSetStrategy that = (DailyArmadaJobSetStrategy) o; + return Objects.equals(jobSetPrefix, that.jobSetPrefix); + } + + @Override + public int hashCode() { + return Objects.hashCode(jobSetPrefix); + } +} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pipeline/ArmadaDeclarativeAgent.java b/src/main/java/io/armadaproject/jenkins/plugin/pipeline/ArmadaDeclarativeAgent.java index aa307591a..bc5fa3bb7 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pipeline/ArmadaDeclarativeAgent.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pipeline/ArmadaDeclarativeAgent.java @@ -9,7 +9,6 @@ import hudson.util.ListBoxModel; import io.armadaproject.jenkins.plugin.ContainerTemplate; import io.armadaproject.jenkins.plugin.PodTemplate; -import io.armadaproject.jenkins.plugin.pod.retention.PodRetention; import io.armadaproject.jenkins.plugin.pod.yaml.YamlMergeStrategy; import java.util.Collections; import java.util.List; @@ -72,9 +71,6 @@ public class ArmadaDeclarativeAgent extends RetryableDeclarativeAgent containerTemplates; @@ -287,18 +283,6 @@ public void setSlaveConnectTimeout(int slaveConnectTimeout) { this.slaveConnectTimeout = slaveConnectTimeout; } - public PodRetention getPodRetention() { - return this.podRetention == null ? ArmadaPodTemplateStep.DescriptorImpl.defaultPodRetention : this.podRetention; - } - - @DataBoundSetter - public void setPodRetention(@CheckForNull PodRetention podRetention) { - this.podRetention = - (podRetention == null || podRetention.equals(ArmadaPodTemplateStep.DescriptorImpl.defaultPodRetention)) - ? null - : podRetention; - } - public String getYamlFile() { return yamlFile; } @@ -439,9 +423,6 @@ public Map getAsArgs() { if (slaveConnectTimeout != 0) { argMap.put("slaveConnectTimeout", slaveConnectTimeout); } - if (podRetention != null) { - argMap.put("podRetention", podRetention); - } if (instanceCap > 0 && instanceCap < Integer.MAX_VALUE) { argMap.put("instanceCap", instanceCap); } @@ -468,7 +449,6 @@ public static class DescriptorImpl extends DeclarativeAgentDescriptor serverUrl = new AtomicReference<>(); - try (ArmadaClient armadaClient = kubernetesCloud.connectToArmada()) { - JobSetRequest jobSetRequest = JobSetRequest.newBuilder() - .setId(kubernetesCloud.getArmadaJobSetPrefix() - + kubernetesCloud.getArmadaJobSetId()) - .setQueue(kubernetesCloud.getArmadaQueue()) - .setErrorIfMissing(true) - .build(); - - armadaClient.getEvents(jobSetRequest).forEachRemaining(e -> { - EventMessage message = e.getMessage(); - // FIXME add wait mechanism - if (message.getRunning().getJobId().equals(kubernetesSlave.getArmadaJobId())) { - String clusterId = message.getRunning().getClusterId(); - try { - serverUrl.set( - ClusterConfigParser.parse(kubernetesCloud.getArmadaClusterConfigPath()) - .get(clusterId)); - - } catch (Exception ex) { - throw new RuntimeException("Failed to parse cluster config file", ex); - } - - namespace = message.getRunning().getPodNamespace(); - podName = message.getRunning().getPodName(); - } - }); - } - - return kubernetesCloud.connect(serverUrl.get(), namespace); + return getKubernetesSlave().connect(); } - private KubernetesSlave getKubernetesSlave() throws IOException, InterruptedException { + private ArmadaSlave getKubernetesSlave() throws IOException, InterruptedException { Node node = context.get(Node.class); - if (!(node instanceof KubernetesSlave)) { + if (!(node instanceof ArmadaSlave)) { throw new AbortException( String.format("Node is not a Armada node: %s", node != null ? node.getNodeName() : null)); } - return (KubernetesSlave) node; + return (ArmadaSlave) node; } } diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pipeline/PodTemplateContext.java b/src/main/java/io/armadaproject/jenkins/plugin/pipeline/PodTemplateContext.java index d685c791f..c2ffb5564 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pipeline/PodTemplateContext.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pipeline/PodTemplateContext.java @@ -8,18 +8,12 @@ public class PodTemplateContext implements Serializable { private static final long serialVersionUID = 3065143885759619305L; - private final String namespace; private final String name; - public PodTemplateContext(String namespace, String name) { - this.namespace = namespace; + public PodTemplateContext(String name) { this.name = name; } - public String getNamespace() { - return namespace; - } - public String getName() { return name; } diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pipeline/PodTemplateStepExecution.java b/src/main/java/io/armadaproject/jenkins/plugin/pipeline/PodTemplateStepExecution.java index eff367f8a..0145297d7 100755 --- a/src/main/java/io/armadaproject/jenkins/plugin/pipeline/PodTemplateStepExecution.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pipeline/PodTemplateStepExecution.java @@ -2,7 +2,6 @@ import static java.util.stream.Collectors.toList; -import edu.umd.cs.findbugs.annotations.CheckForNull; import edu.umd.cs.findbugs.annotations.NonNull; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import hudson.AbortException; @@ -13,7 +12,7 @@ import hudson.slaves.Cloud; import io.armadaproject.jenkins.plugin.ContainerTemplate; import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.armadaproject.jenkins.plugin.KubernetesFolderProperty; +import io.armadaproject.jenkins.plugin.ArmadaFolderProperty; import io.armadaproject.jenkins.plugin.PodAnnotation; import io.armadaproject.jenkins.plugin.PodImagePullSecret; import io.armadaproject.jenkins.plugin.PodTemplate; @@ -91,11 +90,9 @@ public boolean start() throws Exception { stepName = label; } String name = String.format(NAME_FORMAT, stepName, randString); - String namespace = checkNamespace(cloud, podTemplateContext); newTemplate = new PodTemplate(); newTemplate.setName(name); - newTemplate.setNamespace(namespace); if (step.getInheritFrom() == null) { newTemplate.setInheritFrom(PodTemplateUtils.emptyToNull(parentTemplates)); @@ -143,7 +140,6 @@ public boolean start() throws Exception { } newTemplate.setAgentInjection(step.isAgentInjection()); newTemplate.setAgentContainer(step.getAgentContainer()); - newTemplate.setPodRetention(step.getPodRetention()); if (step.getActiveDeadlineSeconds() != 0) { newTemplate.setActiveDeadlineSeconds(step.getActiveDeadlineSeconds()); @@ -166,7 +162,7 @@ public boolean start() throws Exception { cloud.addDynamicTemplate(newTemplate); BodyInvoker invoker = getContext() .newBodyInvoker() - .withContexts(step, new PodTemplateContext(namespace, name)) + .withContexts(step, new PodTemplateContext(name)) .withCallback(new PodTemplateCallback(newTemplate, cloudName)); if (step.getLabel() == null) { invoker.withContext(EnvironmentExpander.merge( @@ -215,25 +211,12 @@ private void checkAccess(Run run, ArmadaCloud armadaCloud) throws AbortExc ItemGroup parent = job.getParent(); // Get the Parent of the Job (which might be a Folder) Set allowedClouds = new HashSet<>(); - KubernetesFolderProperty.collectAllowedClouds(allowedClouds, parent); + ArmadaFolderProperty.collectAllowedClouds(allowedClouds, parent); if (!allowedClouds.contains(armadaCloud.name)) { throw new AbortException(String.format("Not authorized to use Kubernetes cloud: %s", step.getCloud())); } } - private String checkNamespace( - ArmadaCloud armadaCloud, @CheckForNull PodTemplateContext podTemplateContext) { - String namespace = null; - if (!PodTemplateUtils.isNullOrEmpty(step.getNamespace())) { - namespace = step.getNamespace(); - } else if (podTemplateContext != null && !PodTemplateUtils.isNullOrEmpty(podTemplateContext.getNamespace())) { - namespace = podTemplateContext.getNamespace(); - } else { - namespace = armadaCloud.getNamespace(); - } - return namespace; - } - /** * Re-inject the dynamic template when resuming the pipeline */ diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pipeline/SecretsMasker.java b/src/main/java/io/armadaproject/jenkins/plugin/pipeline/SecretsMasker.java index 40f816d0d..2da77c9fb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pipeline/SecretsMasker.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pipeline/SecretsMasker.java @@ -20,8 +20,8 @@ import hudson.Extension; import hudson.remoting.Channel; import hudson.util.LogTaskListener; -import io.armadaproject.jenkins.plugin.KubernetesComputer; -import io.armadaproject.jenkins.plugin.KubernetesSlave; +import io.armadaproject.jenkins.plugin.ArmadaComputer; +import io.armadaproject.jenkins.plugin.ArmadaSlave; import io.armadaproject.jenkins.plugin.PodTemplate; import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.api.model.EnvVar; @@ -77,7 +77,7 @@ public OutputStream decorate(OutputStream logger) throws IOException, Interrupte @Extension public static final class Factory extends DynamicContext.Typed { - private final Map> secrets = new WeakHashMap<>(); + private final Map> secrets = new WeakHashMap<>(); @Override protected Class type() { @@ -86,9 +86,9 @@ protected Class type() { @Override protected TaskListenerDecorator get(DelegatedContext context) throws IOException, InterruptedException { - KubernetesComputer c; + ArmadaComputer c; try { - c = context.get(KubernetesComputer.class); + c = context.get(ArmadaComputer.class); } catch (IOException | InterruptedException x) { LOGGER.log(Level.FINE, "Unable to look up KubernetesComputer", x); return null; @@ -122,13 +122,13 @@ protected TaskListenerDecorator get(DelegatedContext context) throws IOException } } - private static @CheckForNull Set secretsOf(KubernetesComputer c) + private static @CheckForNull Set secretsOf(ArmadaComputer c) throws IOException, InterruptedException { Channel ch = c.getChannel(); if (ch == null) { return null; } - KubernetesSlave slave = c.getNode(); + ArmadaSlave slave = c.getNode(); if (slave == null) { return null; } @@ -165,8 +165,7 @@ protected TaskListenerDecorator get(DelegatedContext context) throws IOException return null; } try (OutputStream errs = new LogTaskListener(LOGGER, Level.FINE).getLogger(); - ExecWatch exec = slave.getKubernetesCloud() - .connect() + ExecWatch exec = slave.connect() .pods() .inNamespace(slave.getNamespace()) .withName(slave.getPodName()) diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Always.java b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Always.java index 9f74ca14c..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Always.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Always.java @@ -1,57 +0,0 @@ -package io.armadaproject.jenkins.plugin.pod.retention; - -import hudson.Extension; -import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.fabric8.kubernetes.api.model.Pod; -import java.io.Serializable; -import java.util.function.Supplier; -import org.jenkinsci.Symbol; -import org.kohsuke.stapler.DataBoundConstructor; - -public class Always extends PodRetention implements Serializable { - - private static final long serialVersionUID = -3363056751880572952L; - - @DataBoundConstructor - public Always() {} - - @Override - public boolean shouldDeletePod(ArmadaCloud cloud, Supplier pod) { - return false; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - - if (obj instanceof Always) { - return true; - } - return false; - } - - @Override - public int hashCode() { - return this.toString().hashCode(); - } - - @Override - public String toString() { - return Messages.always(); - } - - @Extension - @Symbol("always") - public static class DescriptorImpl extends PodRetentionDescriptor { - - @Override - public String getDisplayName() { - return Messages.always(); - } - } -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Default.java b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Default.java index 24a07bb4a..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Default.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Default.java @@ -1,75 +0,0 @@ -package io.armadaproject.jenkins.plugin.pod.retention; - -import hudson.Extension; -import hudson.model.Descriptor; -import hudson.model.DescriptorVisibilityFilter; -import io.fabric8.kubernetes.api.model.Pod; -import java.io.Serializable; -import java.util.function.Supplier; -import io.armadaproject.jenkins.plugin.ArmadaCloud; -import org.jenkinsci.Symbol; -import org.kohsuke.stapler.DataBoundConstructor; - -public class Default extends PodRetention implements Serializable { - - private static final long serialVersionUID = -5209499689925746138L; - - @DataBoundConstructor - public Default() {} - - @Override - public boolean shouldDeletePod(ArmadaCloud cloud, Supplier pod) { - PodRetention parent = cloud.getPodRetention(); - if (!(parent instanceof Default)) { - return parent.shouldDeletePod(cloud, pod); - } - return true; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (obj instanceof Default) { - return true; - } - return false; - } - - @Override - public int hashCode() { - return this.toString().hashCode(); - } - - @Override - public String toString() { - return Messages._default(); - } - - @Extension - public static class FilterImpl extends DescriptorVisibilityFilter { - - @Override - @SuppressWarnings("rawtypes") - public boolean filter(Object context, Descriptor descriptor) { - if (context instanceof ArmadaCloud.DescriptorImpl && descriptor instanceof DescriptorImpl) { - return false; - } - return true; - } - } - - @Extension - @Symbol("default") - public static class DescriptorImpl extends PodRetentionDescriptor { - - @Override - public String getDisplayName() { - return Messages._default(); - } - } -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Never.java b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Never.java index f0670235c..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Never.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Never.java @@ -1,56 +0,0 @@ -package io.armadaproject.jenkins.plugin.pod.retention; - -import hudson.Extension; -import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.fabric8.kubernetes.api.model.Pod; -import java.io.Serializable; -import java.util.function.Supplier; -import org.jenkinsci.Symbol; -import org.kohsuke.stapler.DataBoundConstructor; - -public class Never extends PodRetention implements Serializable { - - private static final long serialVersionUID = -7127652621214283411L; - - @DataBoundConstructor - public Never() {} - - @Override - public boolean shouldDeletePod(ArmadaCloud cloud, Supplier pod) { - return true; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (obj instanceof Never) { - return true; - } - return false; - } - - @Override - public int hashCode() { - return this.toString().hashCode(); - } - - @Override - public String toString() { - return Messages.never(); - } - - @Extension - @Symbol("never") - public static class DescriptorImpl extends PodRetentionDescriptor { - - @Override - public String getDisplayName() { - return Messages.never(); - } - } -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/OnFailure.java b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/OnFailure.java index 6c5fede69..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/OnFailure.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/OnFailure.java @@ -1,72 +0,0 @@ -package io.armadaproject.jenkins.plugin.pod.retention; - -import hudson.Extension; -import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.fabric8.kubernetes.api.model.Pod; -import java.io.Serializable; -import java.util.Locale; -import java.util.function.Supplier; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.jenkinsci.Symbol; -import org.kohsuke.stapler.DataBoundConstructor; - -public class OnFailure extends PodRetention implements Serializable { - - private static final long serialVersionUID = 6424267627207206819L; - - private static final Logger LOGGER = Logger.getLogger(OnFailure.class.getName()); - - @DataBoundConstructor - public OnFailure() {} - - @Override - public boolean shouldDeletePod(ArmadaCloud cloud, Supplier podS) { - Pod pod = null; - try { - pod = podS.get(); - } catch (RuntimeException x) { - LOGGER.log(Level.WARNING, null, x); - } - if (pod == null || pod.getStatus() == null) { - return false; - } - boolean hasErrors = - pod.getStatus().getPhase().toLowerCase(Locale.getDefault()).matches("(failed|unknown)"); - return !hasErrors; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (obj instanceof OnFailure) { - return true; - } - return false; - } - - @Override - public int hashCode() { - return this.toString().hashCode(); - } - - @Override - public String toString() { - return Messages.on_Failure(); - } - - @Extension - @Symbol("onFailure") - public static class DescriptorImpl extends PodRetentionDescriptor { - - @Override - public String getDisplayName() { - return Messages.on_Failure(); - } - } -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/PodRetention.java b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/PodRetention.java index 279e2766a..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/PodRetention.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/PodRetention.java @@ -1,50 +0,0 @@ -package io.armadaproject.jenkins.plugin.pod.retention; - -import hudson.ExtensionPoint; -import hudson.model.AbstractDescribableImpl; -import io.fabric8.kubernetes.api.model.Pod; -import java.util.function.Supplier; -import io.armadaproject.jenkins.plugin.ArmadaCloud; - -/** - * PodRetention instances determine if the Kubernetes pod running a Jenkins agent - * should be deleted after Jenkins terminates the agent. - * - *

Custom pod retention behavior can be added by extending this class, including a descriptor - * that extends {@link PodRetentionDescriptor}

- */ -public abstract class PodRetention extends AbstractDescribableImpl implements ExtensionPoint { - - /** - * Returns the default PodRetention for a KubernetesCloud instance. - * - * @return the {@link Never} PodRetention strategy. - */ - public static PodRetention getKubernetesCloudDefault() { - return new Never(); - } - - /** - * Returns the default PodRetention for a PodTemplate instance. - * - * @return the {@link Default} PodRetention strategy. - */ - public static PodRetention getPodTemplateDefault() { - return new Default(); - } - - /** - * Determines if a agent pod should be deleted after the Jenkins build completes. - * - * @param cloud - the {@link ArmadaCloud} the agent pod belongs to. - * @param pod - the {@link Pod} running the Jenkins build. - * - * @return true if the agent pod should be deleted. - */ - public abstract boolean shouldDeletePod(ArmadaCloud cloud, Supplier pod); - - @Override - public String toString() { - return getClass().getSimpleName(); - } -} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/PodRetentionDescriptor.java b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/PodRetentionDescriptor.java index 5ebb99fbe..e69de29bb 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/PodRetentionDescriptor.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/PodRetentionDescriptor.java @@ -1,8 +0,0 @@ -package io.armadaproject.jenkins.plugin.pod.retention; - -import hudson.model.Descriptor; - -/** - * A {@link Descriptor} for any {@link PodRetention} implementation. - */ -public abstract class PodRetentionDescriptor extends Descriptor {} diff --git a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Reaper.java b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Reaper.java index 6657ea249..c0f1cc14c 100644 --- a/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Reaper.java +++ b/src/main/java/io/armadaproject/jenkins/plugin/pod/retention/Reaper.java @@ -16,10 +16,10 @@ package io.armadaproject.jenkins.plugin.pod.retention; +import api.EventOuterClass; import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import com.github.benmanes.caffeine.cache.LoadingCache; -import edu.umd.cs.findbugs.annotations.CheckForNull; import edu.umd.cs.findbugs.annotations.NonNull; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import hudson.Extension; @@ -35,29 +35,24 @@ import hudson.slaves.ComputerListener; import hudson.slaves.EphemeralNode; import hudson.slaves.OfflineCause; -import io.armadaproject.jenkins.plugin.KubernetesClientProvider; import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.armadaproject.jenkins.plugin.KubernetesComputer; -import io.armadaproject.jenkins.plugin.KubernetesSlave; +import io.armadaproject.jenkins.plugin.ArmadaComputer; +import io.armadaproject.jenkins.plugin.ArmadaSlave; import io.armadaproject.jenkins.plugin.PodUtils; +import io.armadaproject.jenkins.plugin.job.ArmadaClientUtil; +import io.armadaproject.jenkins.plugin.job.ArmadaEventWatcher; +import io.armadaproject.jenkins.plugin.job.ArmadaJobMetadata; +import io.armadaproject.jenkins.plugin.job.ArmadaState; import io.fabric8.kubernetes.api.model.ContainerStateTerminated; import io.fabric8.kubernetes.api.model.ContainerStateWaiting; import io.fabric8.kubernetes.api.model.ContainerStatus; import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.api.model.PodStatus; -import io.fabric8.kubernetes.client.KubernetesClient; -import io.fabric8.kubernetes.client.Watch; import io.fabric8.kubernetes.client.Watcher; -import io.fabric8.kubernetes.client.WatcherException; + +import java.io.Closeable; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.TimeUnit; @@ -71,22 +66,15 @@ import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; /** - * Checks for deleted pods corresponding to {@link KubernetesSlave} and ensures the node is removed from Jenkins too. + * Checks for deleted pods corresponding to {@link ArmadaSlave} and ensures the node is removed from Jenkins too. *

If the pod has been deleted, all of the associated state (running user processes, workspace, etc.) must also be gone; * so there is no point in retaining this agent definition any further. - * ({@link KubernetesSlave} is not an {@link EphemeralNode}: it does support running across Jenkins restarts.) - *

Note that pod retention policies other than the default {@link Never} may disable this system, - * unless some external process or garbage collection policy results in pod deletion. + * ({@link ArmadaSlave} is not an {@link EphemeralNode}: it does support running across Jenkins restarts.) */ @Extension public class Reaper extends ComputerListener { - private static final Logger LOGGER = Logger.getLogger(Reaper.class.getName()); - /** - * Only useful for tests which shutdown Jenkins without terminating the JVM. - * Close the watch so that we don't end up with spam in logs - */ @Extension public static class ReaperShutdownListener extends ItemListener { @Override @@ -105,22 +93,20 @@ public static Reaper getInstance() { */ private final AtomicBoolean activated = new AtomicBoolean(); - private final Map watchers = new ConcurrentHashMap<>(); + private final Map watchers = new ConcurrentHashMap<>(); private final LoadingCache> terminationReasons = Caffeine.newBuilder().expireAfterAccess(1, TimeUnit.DAYS).build(k -> new ConcurrentSkipListSet<>()); @Override public void preLaunch(Computer c, TaskListener taskListener) throws IOException, InterruptedException { - if (c instanceof KubernetesComputer) { + if (c instanceof ArmadaComputer) { Timer.get().schedule(this::maybeActivate, 10, TimeUnit.SECONDS); - // ensure associated cloud is being watched. the watch may have been closed due to exception or - // failure to register on initial activation. - KubernetesSlave node = ((KubernetesComputer) c).getNode(); + var node = ((ArmadaComputer) c).getNode(); if (node != null && !isWatchingCloud(node.getCloudName())) { try { - watchCloud(node.getKubernetesCloud()); + watchCloud(node.getArmadaCloud()); } catch (IllegalStateException ise) { LOGGER.log(Level.WARNING, ise, () -> "kubernetes cloud not found: " + node.getCloudName()); } @@ -146,16 +132,16 @@ private void activate() { } /** - * Remove any {@link KubernetesSlave} nodes that reference Pods that don't exist. + * Remove any {@link ArmadaSlave} nodes that reference Pods that don't exist. */ private void reapAgents() { Jenkins jenkins = Jenkins.getInstanceOrNull(); if (jenkins != null) { for (Node n : new ArrayList<>(jenkins.getNodes())) { - if (!(n instanceof KubernetesSlave)) { + if (!(n instanceof ArmadaSlave)) { continue; } - KubernetesSlave ks = (KubernetesSlave) n; + ArmadaSlave ks = (ArmadaSlave) n; if (ks.getLauncher().isLaunchSupported()) { // Being launched, don't touch it. continue; @@ -169,8 +155,7 @@ private void reapAgents() { // yet we do not want to do an unnamespaced pod list for RBAC reasons. // Could use a hybrid approach: first list all pods in the configured namespace for all clouds; // then go back and individually check any unmatched agents with their configured namespace. - ArmadaCloud cloud = ks.getKubernetesCloud(); - if (cloud.connect().pods().inNamespace(ns).withName(name).get() == null) { + if (ks.connect().pods().inNamespace(ns).withName(name).get() == null) { LOGGER.info(() -> ns + "/" + name + " seems to have been deleted, so removing corresponding Jenkins agent"); jenkins.removeNode(ks); @@ -184,11 +169,6 @@ private void reapAgents() { } } - /** - * Create watchers for each configured {@link ArmadaCloud} in Jenkins and remove any existing watchers - * for clouds that have been removed. If a {@link ArmadaCloud} client configuration property has been - * updated a new watcher will be created to replace the existing one. - */ private void watchClouds() { Jenkins jenkins = Jenkins.getInstanceOrNull(); if (jenkins != null) { @@ -206,62 +186,49 @@ private void watchClouds() { } } - /** - * Register {@link CloudPodWatcher} for the given cloud if one does not exist or if the existing watcher - * is no longer valid. - * @param kc kubernetes cloud to watch - */ private void watchCloud(@NonNull ArmadaCloud kc) { // can't use ConcurrentHashMap#computeIfAbsent because CloudPodWatcher will remove itself from the watchers // map on close. If an error occurs when creating the watch it would create a deadlock situation. - CloudPodWatcher watcher = new CloudPodWatcher(kc); + var watcher = new ArmadaJobWatcher(kc); if (!isCloudPodWatcherActive(watcher)) { try { - KubernetesClient client = kc.connect(); - watcher.watch = client.pods().inNamespace(client.getNamespace()).watch(watcher); - CloudPodWatcher old = watchers.put(kc.name, watcher); + var jobManager = ArmadaState.getJobManager(kc); + watcher.watch = jobManager.watchEvents(watcher); + var old = watchers.put(kc.name, watcher); // if another watch slipped in then make sure it stopped if (old != null) { old.stop(); } LOGGER.info(() -> "set up watcher on " + kc.getDisplayName()); - } catch (KubernetesAuthException | IOException | RuntimeException x) { - LOGGER.log(Level.WARNING, x, () -> "failed to set up watcher on " + kc.getDisplayName()); + } catch(Throwable t) { + LOGGER.log(Level.WARNING, "Failed to set up watcher on " + kc.getDisplayName(), t); } } } - /** - * Check if the cloud is watched for Pod events. - * @param name cloud name - * @return true if a watcher has been registered for the given cloud - */ boolean isWatchingCloud(String name) { return watchers.get(name) != null; } - public Map getWatchers() { - return watchers; - } - /** * Check if the given cloud pod watcher exists and is still valid. Watchers may become invalid * of the kubernetes client configuration changes. * @param watcher watcher to check * @return true if the provided watcher already exists and is valid, false otherwise */ - private boolean isCloudPodWatcherActive(@NonNull CloudPodWatcher watcher) { - CloudPodWatcher existing = watchers.get(watcher.cloudName); - return existing != null && existing.clientValidity == watcher.clientValidity; + private boolean isCloudPodWatcherActive(@NonNull ArmadaJobWatcher watcher) { + var existing = watchers.get(watcher.cloudName); + return existing != null && existing.jobManagerValidity == watcher.jobManagerValidity; } - private static Optional resolveNode(@NonNull Jenkins jenkins, String namespace, String name) { + private static Optional resolveNode(@NonNull Jenkins jenkins, String jobId, String jobSetId) { return new ArrayList<>(jenkins.getNodes()) .stream() - .filter(KubernetesSlave.class::isInstance) - .map(KubernetesSlave.class::cast) + .filter(ArmadaSlave.class::isInstance) + .map(ArmadaSlave.class::cast) .filter(ks -> - Objects.equals(ks.getNamespace(), namespace) && Objects.equals(ks.getPodName(), name)) + Objects.equals(ks.getArmadaJobId(), jobId) && + Objects.equals(ks.getArmadaJobSetId(), jobSetId)) .findFirst(); } @@ -269,101 +236,64 @@ private static Optional resolveNode(@NonNull Jenkins jenkins, S * Stop all watchers */ private void closeAllWatchers() { - // on close each watcher should remove itself from the watchers map (see CloudPodWatcher#onClose) - watchers.values().forEach(CloudPodWatcher::stop); + // on close each watcher should remove itself from the watchers map (see ArmadaJobWatcher#onClose) + watchers.values().forEach(ArmadaJobWatcher::stop); } - /** - * Kubernetes pod event watcher for a Kubernetes Cloud. Notifies {@link Listener} - * extensions on Pod events. The default Kubernetes client watch manager will - * attempt to reconnect on connection errors. If the watch api returns "410 Gone" - * then the Watch will close itself with a WatchException and this watcher will - * deregister itself. - */ - private class CloudPodWatcher implements Watcher { + private class ArmadaJobWatcher implements ArmadaEventWatcher { private final String cloudName; - private final int clientValidity; + private final int jobManagerValidity; + private Closeable watch; - @CheckForNull - private Watch watch; - - CloudPodWatcher(@NonNull ArmadaCloud cloud) { + ArmadaJobWatcher(@NonNull ArmadaCloud cloud) { this.cloudName = cloud.name; - this.clientValidity = KubernetesClientProvider.getValidity(cloud); + jobManagerValidity = ArmadaState.getJobManager(cloud).getValidity(); } - @Override - public void eventReceived(Action action, Pod pod) { - // don't send bookmark event to listeners as they don't represent change in pod state - if (action == Action.BOOKMARK) { - // TODO future enhancement might be to keep track of bookmarks for better reconnect behavior. Would - // likely have to track based on cloud address/namespace in case cloud was renamed or namespace - // is changed. - return; + void stop() { + if (watch != null) { + LOGGER.info("Stopping watch for armada cloud " + cloudName); + try { + this.watch.close(); + } catch (IOException e) { + } } + } - // If there was a non-success http response code from watch request - // or the api returned a Status object the watch manager notifies with - // an error action and null resource. - if (action == Action.ERROR && pod == null) { + @Override + public void onClose() { + LOGGER.fine(() -> cloudName + " watcher closed"); + Reaper.this.watchers.remove(cloudName, this); + } + + @Override + public void onEvent(EventOuterClass.EventMessage message) { + if(!ArmadaClientUtil.isInTerminalState(message.getEventsCase())) { return; } - Jenkins jenkins = Jenkins.getInstanceOrNull(); + var jenkins = Jenkins.getInstanceOrNull(); if (jenkins == null) { return; } - String ns = pod.getMetadata().getNamespace(); - String name = pod.getMetadata().getName(); - Optional optionalNode = resolveNode(jenkins, ns, name); - if (!optionalNode.isPresent()) { + var metadata = ArmadaClientUtil.extractMetadata(message); + var optionalNode = resolveNode(jenkins, metadata.getJobId(), metadata.getJobSetId()); + if(!optionalNode.isPresent()) { return; } + var jobSetId = metadata.getJobSetId(); + var jobId = metadata.getJobId(); Listeners.notify(Listener.class, true, listener -> { try { - Set terminationReasons = Reaper.this.terminationReasons.get( - optionalNode.get().getNodeName()); - listener.onEvent( - action, - optionalNode.get(), - pod, - terminationReasons != null ? terminationReasons : Collections.emptySet()); + var terminationReasons = Reaper.this.terminationReasons.get(optionalNode.get().getNodeName()); + listener.onEvent(optionalNode.get(), metadata, message, terminationReasons); } catch (Exception x) { - LOGGER.log(Level.WARNING, "Listener " + listener + " failed for " + ns + "/" + name, x); + LOGGER.log(Level.WARNING, "Listener " + listener + " failed for " + jobSetId + "/" + jobId, x); } }); } - - /** - * Close the associated {@link Watch} handle. This should be used shutdown/stop the watch. It will cause the - * watch manager to call this classes {@link #onClose()} method. - */ - void stop() { - if (watch != null) { - LOGGER.info("Stopping watch for kubernetes cloud " + cloudName); - this.watch.close(); - } - } - - @Override - public void onClose() { - LOGGER.fine(() -> cloudName + " watcher closed"); - // remove self from watchers list - Reaper.this.watchers.remove(cloudName, this); - } - - @Override - public void onClose(WatcherException e) { - // usually triggered because of "410 Gone" responses - // https://kubernetes.io/docs/reference/using-api/api-concepts/#410-gone-responses - // "Gone" may be returned if the resource version requested is older than the server - // has retained. - LOGGER.log(Level.WARNING, e, () -> cloudName + " watcher closed with exception"); - // remove self from watchers list - Reaper.this.watchers.remove(cloudName, this); - } } /** @@ -371,10 +301,6 @@ public void onClose(WatcherException e) { * @param node a {@link Node#getNodeName} * @return a possibly empty set of {@link ContainerStateTerminated#getReason} or {@link PodStatus#getReason} */ - @SuppressFBWarnings( - value = "NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE", - justification = - "Confused by @org.checkerframework.checker.nullness.qual.Nullable on LoadingCache.get? Never null here.") @NonNull public Set terminationReasons(@NonNull String node) { synchronized (terminationReasons) { @@ -387,256 +313,105 @@ public Set terminationReasons(@NonNull String node) { */ public interface Listener extends ExtensionPoint { - /** - * Handle Pod event. - * @param action the kind of event that happened to the referred pod - * @param node The affected node - * @param pod The affected pod - * @param terminationReasons Set of termination reasons - */ void onEvent( - @NonNull Watcher.Action action, - @NonNull KubernetesSlave node, - @NonNull Pod pod, + @NonNull ArmadaSlave node, + @NonNull ArmadaJobMetadata metadata, + @NonNull EventOuterClass.EventMessage message, @NonNull Set terminationReasons) throws IOException, InterruptedException; } @Extension - public static class RemoveAgentOnPodDeleted implements Listener { + public static class RemoveAgentOnPodCancelled implements Listener { @Override public void onEvent( - @NonNull Watcher.Action action, - @NonNull KubernetesSlave node, - @NonNull Pod pod, + @NonNull ArmadaSlave node, + @NonNull ArmadaJobMetadata metadata, + @NonNull EventOuterClass.EventMessage message, @NonNull Set terminationReasons) throws IOException { - if (action != Watcher.Action.DELETED) { + if (!message.hasCancelled()) { return; } - String ns = pod.getMetadata().getNamespace(); - String name = pod.getMetadata().getName(); - LOGGER.info(() -> ns + "/" + name + " was just deleted, so removing corresponding Jenkins agent"); - node.getRunListener().getLogger().printf("Pod %s/%s was just deleted%n", ns, name); + String jobSet = node.getArmadaJobSetId(); + String job = node.getArmadaJobId(); + LOGGER.info(() -> jobSet + "/" + job + " was just cancelled, so removing corresponding Jenkins agent"); + node.getRunListener().getLogger().printf("Job %s/%s was just cancelled%n", jobSet, job); Jenkins.get().removeNode(node); disconnectComputer(node, new PodOfflineCause(Messages._PodOfflineCause_PodDeleted())); } } @Extension - public static class TerminateAgentOnContainerTerminated implements Listener { - - @Override - public void onEvent( - @NonNull Watcher.Action action, - @NonNull KubernetesSlave node, - @NonNull Pod pod, - @NonNull Set terminationReasons) - throws IOException, InterruptedException { - if (action != Watcher.Action.MODIFIED) { - return; - } - - List terminatedContainers = PodUtils.getTerminatedContainers(pod); - if (!terminatedContainers.isEmpty()) { - List containers = new ArrayList<>(); - terminatedContainers.forEach(c -> { - ContainerStateTerminated t = c.getState().getTerminated(); - String containerName = c.getName(); - containers.add(containerName); - String reason = t.getReason(); - if (reason != null) { - terminationReasons.add(reason); - } - }); - String reason = pod.getStatus().getReason(); - String message = pod.getStatus().getMessage(); - var sb = new StringBuilder() - .append(pod.getMetadata().getNamespace()) - .append("/") - .append(pod.getMetadata().getName()); - if (containers.size() > 1) { - sb.append(" Containers ") - .append(String.join(",", containers)) - .append(" were terminated."); - } else { - sb.append(" Container ") - .append(String.join(",", containers)) - .append(" was terminated."); - } - logAndCleanUp( - node, - pod, - terminationReasons, - reason, - message, - sb, - node.getRunListener(), - new PodOfflineCause(Messages._PodOfflineCause_ContainerFailed("ContainerError", containers))); - } - } - } - - @Extension - public static class TerminateAgentOnPodFailed implements Listener { + public static class TerminateAgentOnJobFailed implements Listener { @Override public void onEvent( - @NonNull Watcher.Action action, - @NonNull KubernetesSlave node, - @NonNull Pod pod, + @NonNull ArmadaSlave node, + @NonNull ArmadaJobMetadata metadata, + @NonNull EventOuterClass.EventMessage message, @NonNull Set terminationReasons) throws IOException, InterruptedException { - if (action != Watcher.Action.MODIFIED) { + if (!ArmadaClientUtil.isInFailedState(message.getEventsCase())) { return; } - if ("Failed".equals(pod.getStatus().getPhase())) { - String reason = pod.getStatus().getReason(); - String message = pod.getStatus().getMessage(); - logAndCleanUp( - node, - pod, - terminationReasons, - reason, - message, - new StringBuilder() - .append(pod.getMetadata().getNamespace()) - .append("/") - .append(pod.getMetadata().getName()) - .append(" Pod just failed."), - node.getRunListener(), - new PodOfflineCause(Messages._PodOfflineCause_PodFailed(reason, message))); - } + var reason = metadata.getReason(); + var cause = metadata.getCause(); + logAndCleanUp( + node, + terminationReasons, + reason, + cause, + new StringBuilder() + .append(metadata.getJobSetId()) + .append("/") + .append(metadata.getJobId()) + .append(" Job just failed."), + node.getRunListener(), + new PodOfflineCause(Messages._PodOfflineCause_PodFailed(reason, message))); } } private static void logAndCleanUp( - KubernetesSlave node, - Pod pod, + ArmadaSlave node, Set terminationReasons, String reason, - String message, + EventOuterClass.Cause failCause, StringBuilder sb, TaskListener runListener, - PodOfflineCause cause) - throws IOException, InterruptedException { + PodOfflineCause cause) { List details = new ArrayList<>(); if (reason != null) { details.add("Reason: " + reason); terminationReasons.add(reason); } - if (message != null) { - details.add("Message: " + message); - } if (!details.isEmpty()) { sb.append(" ").append(String.join(", ", details)).append("."); } - var evictionCondition = pod.getStatus().getConditions().stream() - .filter(c -> "EvictionByEvictionAPI".equals(c.getReason())) - .findFirst(); - if (evictionCondition.isPresent()) { - sb.append(" Pod was evicted by the Kubernetes Eviction API."); - terminationReasons.add(evictionCondition.get().getReason()); + if(failCause != null) { + sb.append(" failure cause: ").append(failCause); + terminationReasons.add(failCause.name()); } LOGGER.info(() -> sb + " Removing corresponding node " + node.getNodeName() + " from Jenkins."); runListener.getLogger().println(sb); - logLastLinesThenTerminateNode(node, pod, runListener); - PodUtils.cancelQueueItemFor(pod, "PodFailure"); + PodUtils.cancelQueueItemFor(node, "PodFailure"); disconnectComputer(node, cause); } - private static void logLastLinesThenTerminateNode(KubernetesSlave node, Pod pod, TaskListener runListener) - throws IOException, InterruptedException { - try { - String lines = PodUtils.logLastLines(pod, node.getKubernetesCloud().connect()); - if (lines != null) { - runListener.getLogger().print(lines); - } - } catch (KubernetesAuthException e) { - LOGGER.log(Level.FINE, e, () -> "Unable to get logs after pod failed event"); - } finally { - node.terminate(); - } - } - /** * Disconnect computer associated with the given node. Should be called AFTER terminate so the offline cause - * takes precedence over the one set by {@link KubernetesSlave#terminate()} (via {@link jenkins.model.Nodes#removeNode(Node)}). + * takes precedence over the one set by {@link ArmadaSlave#terminate()} (via {@link jenkins.model.Nodes#removeNode(Node)}). * @see Computer#disconnect(OfflineCause) * @param node node to disconnect * @param cause reason for offline */ - private static void disconnectComputer(KubernetesSlave node, OfflineCause cause) { + private static void disconnectComputer(ArmadaSlave node, OfflineCause cause) { Computer computer = node.getComputer(); if (computer != null) { computer.disconnect(cause); } } - @Extension - public static class TerminateAgentOnImagePullBackOff implements Listener { - - @SuppressFBWarnings( - value = "MS_SHOULD_BE_FINAL", - justification = "Allow tests or groovy console to change the value") - public static long BACKOFF_EVENTS_LIMIT = - SystemProperties.getInteger(Reaper.class.getName() + ".backoffEventsLimit", 3); - - public static final String IMAGE_PULL_BACK_OFF = "ImagePullBackOff"; - - // For each pod with at least 1 backoff, keep track of the first backoff event for 15 minutes. - private Cache ttlCache = - Caffeine.newBuilder().expireAfterWrite(15, TimeUnit.MINUTES).build(); - - @Override - public void onEvent( - @NonNull Watcher.Action action, - @NonNull KubernetesSlave node, - @NonNull Pod pod, - @NonNull Set terminationReasons) - throws IOException, InterruptedException { - if (action != Watcher.Action.MODIFIED) { - return; - } - - List backOffContainers = PodUtils.getContainers(pod, cs -> { - ContainerStateWaiting waiting = cs.getState().getWaiting(); - return waiting != null - && waiting.getMessage() != null - && waiting.getMessage().contains("Back-off pulling image"); - }); - - if (!backOffContainers.isEmpty()) { - List images = new ArrayList<>(); - backOffContainers.forEach(cs -> images.add(cs.getImage())); - var podUid = pod.getMetadata().getUid(); - var backOffNumber = ttlCache.get(podUid, k -> 0); - ttlCache.put(podUid, ++backOffNumber); - if (backOffNumber >= BACKOFF_EVENTS_LIMIT) { - var imagesString = String.join(",", images); - node.getRunListener() - .error("Unable to pull container image \"" + imagesString - + "\". Check if image tag name is spelled correctly."); - terminationReasons.add(IMAGE_PULL_BACK_OFF); - PodUtils.cancelQueueItemFor(pod, IMAGE_PULL_BACK_OFF); - node.terminate(); - disconnectComputer( - node, - new PodOfflineCause( - Messages._PodOfflineCause_ImagePullBackoff(IMAGE_PULL_BACK_OFF, images))); - } else { - node.getRunListener() - .error("Image pull backoff detected, waiting for image to be available. Will wait for " - + (BACKOFF_EVENTS_LIMIT - backOffNumber) - + " more events before terminating the node."); - } - } - } - } - - /** - * {@link SaveableListener} that will update cloud watchers when Jenkins configuration is updated. - */ @Extension public static class ReaperSaveableListener extends SaveableListener { @Override diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/config.jelly b/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/config.jelly index b01e52c9c..642f12651 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/config.jelly +++ b/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/config.jelly @@ -21,7 +21,7 @@ THE SOFTWARE. - + @@ -29,7 +29,7 @@ THE SOFTWARE. - + @@ -49,7 +49,7 @@ THE SOFTWARE. - + @@ -62,26 +62,14 @@ THE SOFTWARE. - - - - - - - - - - - - @@ -94,8 +82,6 @@ THE SOFTWARE. - - @@ -129,11 +115,6 @@ THE SOFTWARE. deleteCaption="${%Delete Pod Label}" /> - - - - @@ -157,5 +138,4 @@ THE SOFTWARE. - diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-garbageCollection.html b/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-garbageCollection.html index 45f5ce9ea..e69de29bb 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-garbageCollection.html +++ b/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-garbageCollection.html @@ -1,5 +0,0 @@ -

- Enables garbage collection of orphan pods for this Kubernetes cloud.
- - When enabled, Jenkins will periodically check for orphan pods that have not been touched for the given timeout period and delete them. -

diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-podRetention.html b/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-podRetention.html index 9fa0fd5ff..e69de29bb 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-podRetention.html +++ b/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-podRetention.html @@ -1,15 +0,0 @@ -
-

- This setting controls how agent pods are retained after the Jenkins build completes. - The following retention policies are provided: -

-
    -
  1. Never - always delete the agent pod.
  2. -
  3. On Failure - keep the agent pod if it fails during the build.
  4. -
  5. Always - always keep the agent pod.
  6. -
-

- Note: Kubernetes administrators are responsible for managing any kept agent pod. - These will not be deleted by the Jenkins Kubernetes plugin. -

-
\ No newline at end of file diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-serverCertificate.html b/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-serverCertificate.html index 4794fd7cd..e69de29bb 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-serverCertificate.html +++ b/src/main/resources/io/armadaproject/jenkins/plugin/ArmadaCloud/help-serverCertificate.html @@ -1,3 +0,0 @@ -
- X509 PEM encoded certificate. Can be additionally base64 encoded (as provided by Amazon EKS). -
diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/GarbageCollection/config.jelly b/src/main/resources/io/armadaproject/jenkins/plugin/GarbageCollection/config.jelly index d958f89ff..e69de29bb 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/GarbageCollection/config.jelly +++ b/src/main/resources/io/armadaproject/jenkins/plugin/GarbageCollection/config.jelly @@ -1,30 +0,0 @@ - - - - - - - - - - - - diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/GarbageCollection/help-namespaces.html b/src/main/resources/io/armadaproject/jenkins/plugin/GarbageCollection/help-namespaces.html index c9639c424..e69de29bb 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/GarbageCollection/help-namespaces.html +++ b/src/main/resources/io/armadaproject/jenkins/plugin/GarbageCollection/help-namespaces.html @@ -1,2 +0,0 @@ -Namespaces to look at for garbage collection, in addition to the default namespace defined for the cloud. -One namespace per line. diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/Messages.properties b/src/main/resources/io/armadaproject/jenkins/plugin/Messages.properties index 25161e0f4..f58673d8b 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/Messages.properties +++ b/src/main/resources/io/armadaproject/jenkins/plugin/Messages.properties @@ -1,9 +1,9 @@ -offline=Kubernetes agent is going offline +offline=Armada agent is going offline NonConfigurableKubernetesCloud.displayName=Kubernetes (predefined settings) KubernetesSlave.AgentIsProvisionedFromTemplate=Agent {0} is provisioned from template {1} RFC1123.error=Container Names MUST match RFC 1123 - They can only contain lowercase letters, numbers or dashes: {0} label.error=Labels must follow required specs - https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set: {0} -KubernetesFolderProperty.displayName=Kubernetes +ArmadaFolderProperty.displayName=Armada KubernetesSlave.HomeWarning=[WARNING] HOME is set to / in the agent container. You may encounter \ troubles when using tools or ssh client. This usually happens if the uid doesn't have any \ entry in /etc/passwd. Please add a user to your Dockerfile or set the HOME environment \ diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/config.jelly b/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/config.jelly index 7547cbea5..3902b7134 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/config.jelly +++ b/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/config.jelly @@ -29,10 +29,6 @@ THE SOFTWARE. - - - - @@ -77,8 +73,6 @@ THE SOFTWARE. - - diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/help-namespace.html b/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/help-namespace.html index 6215d744e..e69de29bb 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/help-namespace.html +++ b/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/help-namespace.html @@ -1,3 +0,0 @@ -Namespace in which to schedule the pod.
- -Leave empty to use the namespace defined at cloud level. diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/help-podRetention.html b/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/help-podRetention.html index 1a6250e26..e69de29bb 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/help-podRetention.html +++ b/src/main/resources/io/armadaproject/jenkins/plugin/PodTemplate/help-podRetention.html @@ -1,17 +0,0 @@ -
-

- This setting controls how agent pods are retained after the Jenkins build completes for this pod template. - Values other than "Default" will override the plugin's Pod Retention setting. - The following retention policies are provided: -

-
    -
  1. Always - always keep the agent pod.
  2. -
  3. Default - use the Pod Retention setting for the plugin.
  4. -
  5. Never - always delete the agent pod.
  6. -
  7. On Failure - keep the agent pod if it fails during the build.
  8. -
-

- Note: Kubernetes administrators are responsible for managing any kept agent pod. - These will not be deleted by the Jenkins Kubernetes plugin. -

-
diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/pipeline/KubernetesDeclarativeAgent/config.jelly b/src/main/resources/io/armadaproject/jenkins/plugin/pipeline/KubernetesDeclarativeAgent/config.jelly index b28d8c673..bc234ac4a 100644 --- a/src/main/resources/io/armadaproject/jenkins/plugin/pipeline/KubernetesDeclarativeAgent/config.jelly +++ b/src/main/resources/io/armadaproject/jenkins/plugin/pipeline/KubernetesDeclarativeAgent/config.jelly @@ -3,9 +3,6 @@ - - - @@ -27,9 +24,6 @@ - - - diff --git a/src/main/resources/io/armadaproject/jenkins/plugin/pipeline/PodTemplateStep/config.jelly b/src/main/resources/io/armadaproject/jenkins/plugin/pipeline/PodTemplateStep/config.jelly index fac72d05f..16bf8ea5f 100755 --- a/src/main/resources/io/armadaproject/jenkins/plugin/pipeline/PodTemplateStep/config.jelly +++ b/src/main/resources/io/armadaproject/jenkins/plugin/pipeline/PodTemplateStep/config.jelly @@ -10,10 +10,6 @@ - - - - @@ -66,9 +62,6 @@ - - - diff --git a/src/test/java/io/armadaproject/ArmadaMapperTest.java b/src/test/java/io/armadaproject/ArmadaMapperTest.java index 1f2a6bef0..881b51cd2 100644 --- a/src/test/java/io/armadaproject/ArmadaMapperTest.java +++ b/src/test/java/io/armadaproject/ArmadaMapperTest.java @@ -5,6 +5,7 @@ import api.SubmitOuterClass.JobSubmitRequest; import api.SubmitOuterClass.JobSubmitRequestItem; +import io.armadaproject.jenkins.plugin.job.ArmadaMapper; import io.fabric8.kubernetes.api.model.ContainerPort; import io.fabric8.kubernetes.api.model.ContainerResizePolicy; import io.fabric8.kubernetes.api.model.ObjectMeta; diff --git a/src/test/java/io/armadaproject/jenkins/plugin/ArmadaCloudFIPSTest.java b/src/test/java/io/armadaproject/jenkins/plugin/ArmadaCloudFIPSTest.java index f1f4c90a2..732ae7342 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/ArmadaCloudFIPSTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/ArmadaCloudFIPSTest.java @@ -28,33 +28,33 @@ public class ArmadaCloudFIPSTest { @Rule public JenkinsRule r = new JenkinsRule(); - @Test - @Issue("JENKINS-73460") - public void onlyFipsCompliantValuesAreAcceptedTest() throws IOException { - ArmadaCloud cloud = new ArmadaCloud("test-cloud"); - assertThrows(IllegalArgumentException.class, () -> cloud.setSkipTlsVerify(true)); - cloud.setSkipTlsVerify(false); - assertThrows(IllegalArgumentException.class, () -> cloud.setServerUrl("http://example.org")); - cloud.setServerUrl("https://example.org"); - assertThrows( - "Invalid certificates throw exception", - IllegalArgumentException.class, - () -> cloud.setServerCertificate(getCert("not-a-cert"))); - Throwable exception = assertThrows( - "Invalid length", IllegalArgumentException.class, () -> cloud.setServerCertificate(getCert("rsa1024"))); - assertThat(exception.getLocalizedMessage(), containsString("2048")); - cloud.setServerCertificate(getCert("rsa2048")); - exception = assertThrows( - "invalid length", IllegalArgumentException.class, () -> cloud.setServerCertificate(getCert("dsa1024"))); - assertThat(exception.getLocalizedMessage(), containsString("2048")); - cloud.setServerCertificate(getCert("dsa2048")); - exception = assertThrows( - "Invalid field size", - IllegalArgumentException.class, - () -> cloud.setServerCertificate(getCert("ecdsa192"))); - assertThat(exception.getLocalizedMessage(), containsString("224")); - cloud.setServerCertificate(getCert("ecdsa224")); - } +// @Test +// @Issue("JENKINS-73460") +// public void onlyFipsCompliantValuesAreAcceptedTest() throws IOException { +// ArmadaCloud cloud = new ArmadaCloud("test-cloud"); +// assertThrows(IllegalArgumentException.class, () -> cloud.setSkipTlsVerify(true)); +// cloud.setSkipTlsVerify(false); +// assertThrows(IllegalArgumentException.class, () -> cloud.setServerUrl("http://example.org")); +// cloud.setServerUrl("https://example.org"); +// assertThrows( +// "Invalid certificates throw exception", +// IllegalArgumentException.class, +// () -> cloud.setServerCertificate(getCert("not-a-cert"))); +// Throwable exception = assertThrows( +// "Invalid length", IllegalArgumentException.class, () -> cloud.setServerCertificate(getCert("rsa1024"))); +// assertThat(exception.getLocalizedMessage(), containsString("2048")); +// cloud.setServerCertificate(getCert("rsa2048")); +// exception = assertThrows( +// "invalid length", IllegalArgumentException.class, () -> cloud.setServerCertificate(getCert("dsa1024"))); +// assertThat(exception.getLocalizedMessage(), containsString("2048")); +// cloud.setServerCertificate(getCert("dsa2048")); +// exception = assertThrows( +// "Invalid field size", +// IllegalArgumentException.class, +// () -> cloud.setServerCertificate(getCert("ecdsa192"))); +// assertThat(exception.getLocalizedMessage(), containsString("224")); +// cloud.setServerCertificate(getCert("ecdsa224")); +// } @Test @Issue("JENKINS-73460") diff --git a/src/test/java/io/armadaproject/jenkins/plugin/ArmadaCloudTest.java b/src/test/java/io/armadaproject/jenkins/plugin/ArmadaCloudTest.java index d3f1645f9..16804b354 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/ArmadaCloudTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/ArmadaCloudTest.java @@ -233,7 +233,7 @@ public void copyConstructor() throws Exception { } } } - cloud.setServerCertificate("-----BEGIN CERTIFICATE-----"); + // cloud.setServerCertificate("-----BEGIN CERTIFICATE-----"); cloud.setTemplates(Collections.singletonList(pt)); cloud.setPodRetention(new Always()); cloud.setPodLabels(PodLabel.listOf("foo", "bar", "cat", "dog")); diff --git a/src/test/java/io/armadaproject/jenkins/plugin/KubernetesClientProviderTest.java b/src/test/java/io/armadaproject/jenkins/plugin/KubernetesClientProviderTest.java index a112eb8f9..7e53404b1 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/KubernetesClientProviderTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/KubernetesClientProviderTest.java @@ -1,89 +1,89 @@ -/* - * The MIT License - * - * Copyright (c) 2016, CloudBees, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package io.armadaproject.jenkins.plugin; - -import static org.junit.Assert.assertEquals; - -import java.util.function.Consumer; -import io.armadaproject.jenkins.plugin.pod.retention.Always; -import org.junit.Assert; -import org.junit.Test; - -public class KubernetesClientProviderTest { - - @Test - public void testGetValidity() { - ArmadaCloud cloud = new ArmadaCloud("foo"); - // changes to these properties should trigger different validity value - checkValidityChanges( - cloud, - c -> c.setServerUrl("https://server:443"), - c -> c.setNamespace("blue"), - c -> c.setServerCertificate("cert"), - c -> c.setCredentialsId("secret"), - c -> c.setSkipTlsVerify(true), - c -> c.setConnectTimeout(46), - c -> c.setReadTimeout(43), - c -> c.setMaxRequestsPerHost(47), - c -> c.setUseJenkinsProxy(true)); - - // changes to these properties should not trigger different validity value - checkValidityDoesNotChange( - cloud, - c -> c.setPodLabels(PodLabel.listOf("foo", "bar")), - c -> c.setJenkinsUrl("https://localhost:8081"), - c -> c.setJenkinsTunnel("https://jenkins.cluster.svc"), - c -> c.setPodRetention(new Always()), - c -> c.setWebSocket(true), - c -> c.setRetentionTimeout(52), - c -> c.setDirectConnection(true)); - - // verify stability - assertEquals(KubernetesClientProvider.getValidity(cloud), KubernetesClientProvider.getValidity(cloud)); - } - - private void checkValidityChanges(ArmadaCloud cloud, Consumer... mutations) { - checkValidity(cloud, Assert::assertNotEquals, mutations); - } - - private void checkValidityDoesNotChange(ArmadaCloud cloud, Consumer... mutations) { - checkValidity(cloud, Assert::assertEquals, mutations); - } - - private void checkValidity( - ArmadaCloud cloud, ValidityAssertion validityAssertion, Consumer... mutations) { - int v = KubernetesClientProvider.getValidity(cloud); - int count = 1; - for (Consumer mut : mutations) { - mut.accept(cloud); - int after = KubernetesClientProvider.getValidity(cloud); - validityAssertion.doAssert("change #" + count++ + " of " + mutations.length, v, after); - v = after; - } - } - - interface ValidityAssertion { - void doAssert(String message, int before, int after); - } -} +///* +// * The MIT License +// * +// * Copyright (c) 2016, CloudBees, Inc. +// * +// * Permission is hereby granted, free of charge, to any person obtaining a copy +// * of this software and associated documentation files (the "Software"), to deal +// * in the Software without restriction, including without limitation the rights +// * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// * copies of the Software, and to permit persons to whom the Software is +// * furnished to do so, subject to the following conditions: +// * +// * The above copyright notice and this permission notice shall be included in +// * all copies or substantial portions of the Software. +// * +// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// * THE SOFTWARE. +// */ +//package io.armadaproject.jenkins.plugin; +// +//import static org.junit.Assert.assertEquals; +// +//import java.util.function.Consumer; +//import io.armadaproject.jenkins.plugin.pod.retention.Always; +//import org.junit.Assert; +//import org.junit.Test; +// +//public class KubernetesClientProviderTest { +// +//// @Test +//// public void testGetValidity() { +//// ArmadaCloud cloud = new ArmadaCloud("foo"); +//// // changes to these properties should trigger different validity value +//// checkValidityChanges( +//// cloud, +//// c -> c.setServerUrl("https://server:443"), +//// c -> c.setNamespace("blue"), +//// c -> c.setServerCertificate("cert"), +//// c -> c.setCredentialsId("secret"), +//// c -> c.setSkipTlsVerify(true), +//// c -> c.setConnectTimeout(46), +//// c -> c.setReadTimeout(43), +//// c -> c.setMaxRequestsPerHost(47), +//// c -> c.setUseJenkinsProxy(true)); +//// +//// // changes to these properties should not trigger different validity value +//// checkValidityDoesNotChange( +//// cloud, +//// c -> c.setPodLabels(PodLabel.listOf("foo", "bar")), +//// c -> c.setJenkinsUrl("https://localhost:8081"), +//// c -> c.setJenkinsTunnel("https://jenkins.cluster.svc"), +//// c -> c.setPodRetention(new Always()), +//// c -> c.setWebSocket(true), +//// c -> c.setRetentionTimeout(52), +//// c -> c.setDirectConnection(true)); +//// +//// // verify stability +//// assertEquals(KubernetesClientProvider.getValidity(cloud), KubernetesClientProvider.getValidity(cloud)); +//// } +// +// private void checkValidityChanges(ArmadaCloud cloud, Consumer... mutations) { +// checkValidity(cloud, Assert::assertNotEquals, mutations); +// } +// +// private void checkValidityDoesNotChange(ArmadaCloud cloud, Consumer... mutations) { +// checkValidity(cloud, Assert::assertEquals, mutations); +// } +// +// private void checkValidity( +// ArmadaCloud cloud, ValidityAssertion validityAssertion, Consumer... mutations) { +// int v = KubernetesClientProvider.getValidity(cloud); +// int count = 1; +// for (Consumer mut : mutations) { +// mut.accept(cloud); +// int after = KubernetesClientProvider.getValidity(cloud); +// validityAssertion.doAssert("change #" + count++ + " of " + mutations.length, v, after); +// v = after; +// } +// } +// +// interface ValidityAssertion { +// void doAssert(String message, int before, int after); +// } +//} diff --git a/src/test/java/io/armadaproject/jenkins/plugin/KubernetesFolderPropertyTest.java b/src/test/java/io/armadaproject/jenkins/plugin/KubernetesFolderPropertyTest.java index 4ff1ed1f8..cfb4320be 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/KubernetesFolderPropertyTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/KubernetesFolderPropertyTest.java @@ -27,37 +27,37 @@ public void propertySavedOnFirstSaveTest() throws Exception { j.jenkins.clouds.add(kube2); Folder folder = j.jenkins.createProject(Folder.class, "folder001"); - KubernetesFolderProperty prop = new KubernetesFolderProperty(); + ArmadaFolderProperty prop = new ArmadaFolderProperty(); folder.addProperty(prop); Folder after = j.configRoundtrip(folder); assertThat( "Property exists after saving", - after.getProperties().get(KubernetesFolderProperty.class), + after.getProperties().get(ArmadaFolderProperty.class), notNullValue()); assertThat( "No selected clouds", - after.getProperties().get(KubernetesFolderProperty.class).getPermittedClouds(), + after.getProperties().get(ArmadaFolderProperty.class).getPermittedClouds(), empty()); folder.getProperties() - .get(KubernetesFolderProperty.class) + .get(ArmadaFolderProperty.class) .setPermittedClouds(Collections.singletonList("kube1")); after = j.configRoundtrip(folder); assertThat( "Kube1 cloud is added", - after.getProperties().get(KubernetesFolderProperty.class).getPermittedClouds(), + after.getProperties().get(ArmadaFolderProperty.class).getPermittedClouds(), contains("kube1")); Folder subFolder = folder.createProject(Folder.class, "subfolder001"); - KubernetesFolderProperty prop2 = new KubernetesFolderProperty(); + ArmadaFolderProperty prop2 = new ArmadaFolderProperty(); prop2.setPermittedClouds(Collections.singletonList("kube2")); subFolder.addProperty(prop2); after = j.configRoundtrip(subFolder); assertThat( "Contains own and inherited cloud", - after.getProperties().get(KubernetesFolderProperty.class).getPermittedClouds(), + after.getProperties().get(ArmadaFolderProperty.class).getPermittedClouds(), containsInAnyOrder("kube1", "kube2")); } } diff --git a/src/test/java/io/armadaproject/jenkins/plugin/KubernetesQueueTaskDispatcherTest.java b/src/test/java/io/armadaproject/jenkins/plugin/KubernetesQueueTaskDispatcherTest.java index 4ddc2f1cc..bd7c3aeca 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/KubernetesQueueTaskDispatcherTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/KubernetesQueueTaskDispatcherTest.java @@ -36,8 +36,8 @@ public class KubernetesQueueTaskDispatcherTest { private Folder folderA; private Folder folderB; - private KubernetesSlave slaveA; - private KubernetesSlave slaveB; + private ArmadaSlave slaveA; + private ArmadaSlave slaveB; public void setUpTwoClouds() throws Exception { folderA = new Folder(jenkins.jenkins, "A"); @@ -54,24 +54,24 @@ public void setUpTwoClouds() throws Exception { jenkins.jenkins.clouds.add(cloudA); jenkins.jenkins.clouds.add(cloudB); - KubernetesFolderProperty property1 = new KubernetesFolderProperty(); + ArmadaFolderProperty property1 = new ArmadaFolderProperty(); folderA.addProperty(property1); JSONObject json1 = new JSONObject(); json1.element("usage-permission-A", true); json1.element("usage-permission-B", false); folderA.addProperty(property1.reconfigure(null, json1)); - KubernetesFolderProperty property2 = new KubernetesFolderProperty(); + ArmadaFolderProperty property2 = new ArmadaFolderProperty(); folderB.addProperty(property2); JSONObject json2 = new JSONObject(); json2.element("usage-permission-A", false); json2.element("usage-permission-B", true); folderB.addProperty(property2.reconfigure(null, json2)); - slaveA = new KubernetesSlave( - "A", new PodTemplate(), "testA", "A", "dockerA", new KubernetesLauncher(), RetentionStrategy.INSTANCE); - slaveB = new KubernetesSlave( - "B", new PodTemplate(), "testB", "B", "dockerB", new KubernetesLauncher(), RetentionStrategy.INSTANCE); + slaveA = new ArmadaSlave( + "A", new PodTemplate(), "testA", "A", "dockerA", new ArmadaLauncher(), RetentionStrategy.INSTANCE); + slaveB = new ArmadaSlave( + "B", new PodTemplate(), "testB", "B", "dockerB", new ArmadaLauncher(), RetentionStrategy.INSTANCE); } @Test @@ -80,17 +80,17 @@ public void checkRestrictedTwoClouds() throws Exception { FreeStyleProject projectA = folderA.createProject(FreeStyleProject.class, "buildJob"); FreeStyleProject projectB = folderB.createProject(FreeStyleProject.class, "buildJob"); - KubernetesQueueTaskDispatcher dispatcher = new KubernetesQueueTaskDispatcher(); + ArmadaQueueTaskDispatcher dispatcher = new ArmadaQueueTaskDispatcher(); assertNull(dispatcher.canTake( slaveA, new Queue.BuildableItem(new Queue.WaitingItem(Calendar.getInstance(), projectA, new ArrayList<>())))); assertTrue( canTake(dispatcher, slaveB, projectA) - instanceof KubernetesQueueTaskDispatcher.KubernetesCloudNotAllowed); + instanceof ArmadaQueueTaskDispatcher.KubernetesCloudNotAllowed); assertTrue( canTake(dispatcher, slaveA, projectB) - instanceof KubernetesQueueTaskDispatcher.KubernetesCloudNotAllowed); + instanceof ArmadaQueueTaskDispatcher.KubernetesCloudNotAllowed); assertNull(canTake(dispatcher, slaveB, projectB)); } @@ -102,9 +102,9 @@ public void checkNotRestrictedClouds() throws Exception { ArmadaCloud cloud = new ArmadaCloud("C"); cloud.setUsageRestricted(false); jenkins.jenkins.clouds.add(cloud); - KubernetesQueueTaskDispatcher dispatcher = new KubernetesQueueTaskDispatcher(); - KubernetesSlave slave = new KubernetesSlave( - "C", new PodTemplate(), "testC", "C", "dockerC", new KubernetesLauncher(), RetentionStrategy.INSTANCE); + ArmadaQueueTaskDispatcher dispatcher = new ArmadaQueueTaskDispatcher(); + ArmadaSlave slave = new ArmadaSlave( + "C", new PodTemplate(), "testC", "C", "dockerC", new ArmadaLauncher(), RetentionStrategy.INSTANCE); assertNull(canTake(dispatcher, slave, project)); } @@ -113,7 +113,7 @@ public void checkNotRestrictedClouds() throws Exception { public void checkDumbSlave() throws Exception { DumbSlave slave = jenkins.createOnlineSlave(); FreeStyleProject project = jenkins.createProject(FreeStyleProject.class); - KubernetesQueueTaskDispatcher dispatcher = new KubernetesQueueTaskDispatcher(); + ArmadaQueueTaskDispatcher dispatcher = new ArmadaQueueTaskDispatcher(); assertNull(canTake(dispatcher, slave, project)); } @@ -124,20 +124,20 @@ public void checkPipelinesRestrictedTwoClouds() throws Exception { WorkflowJob job = folderA.createProject(WorkflowJob.class, "pipeline"); when(task.getOwnerTask()).thenReturn(job); - KubernetesQueueTaskDispatcher dispatcher = new KubernetesQueueTaskDispatcher(); + ArmadaQueueTaskDispatcher dispatcher = new ArmadaQueueTaskDispatcher(); assertNull(canTake(dispatcher, slaveA, task)); assertTrue( - canTake(dispatcher, slaveB, task) instanceof KubernetesQueueTaskDispatcher.KubernetesCloudNotAllowed); + canTake(dispatcher, slaveB, task) instanceof ArmadaQueueTaskDispatcher.KubernetesCloudNotAllowed); } - private CauseOfBlockage canTake(KubernetesQueueTaskDispatcher dispatcher, Slave slave, Project project) { + private CauseOfBlockage canTake(ArmadaQueueTaskDispatcher dispatcher, Slave slave, Project project) { return dispatcher.canTake( slave, new Queue.BuildableItem(new Queue.WaitingItem(Calendar.getInstance(), project, new ArrayList<>()))); } - private CauseOfBlockage canTake(KubernetesQueueTaskDispatcher dispatcher, Slave slave, Queue.Task task) { + private CauseOfBlockage canTake(ArmadaQueueTaskDispatcher dispatcher, Slave slave, Queue.Task task) { return dispatcher.canTake( slave, new Queue.BuildableItem(new Queue.WaitingItem(Calendar.getInstance(), task, new ArrayList<>()))); } diff --git a/src/test/java/io/armadaproject/jenkins/plugin/KubernetesSlaveTest.java b/src/test/java/io/armadaproject/jenkins/plugin/KubernetesSlaveTest.java index a23e9ca5c..5e16aa772 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/KubernetesSlaveTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/KubernetesSlaveTest.java @@ -56,16 +56,16 @@ public void testGetSlaveName() { List volumes = Collections.emptyList(); List containers = Collections.emptyList(); - KubernetesTestUtil.assertRegex(KubernetesSlave.getSlaveName(new PodTemplate("image", volumes)), "^jenkins-agent-[0-9a-z]{5}$"); + KubernetesTestUtil.assertRegex(ArmadaSlave.getSlaveName(new PodTemplate("image", volumes)), "^jenkins-agent-[0-9a-z]{5}$"); KubernetesTestUtil.assertRegex( - KubernetesSlave.getSlaveName(new PodTemplate("", volumes, containers)), "^jenkins-agent-[0-9a-z]{5}$"); + ArmadaSlave.getSlaveName(new PodTemplate("", volumes, containers)), "^jenkins-agent-[0-9a-z]{5}$"); KubernetesTestUtil.assertRegex( - KubernetesSlave.getSlaveName(new PodTemplate("a name", volumes, containers)), ("^a-name-[0-9a-z]{5}$")); + ArmadaSlave.getSlaveName(new PodTemplate("a name", volumes, containers)), ("^a-name-[0-9a-z]{5}$")); KubernetesTestUtil.assertRegex( - KubernetesSlave.getSlaveName(new PodTemplate("an_other_name", volumes, containers)), + ArmadaSlave.getSlaveName(new PodTemplate("an_other_name", volumes, containers)), ("^an-other-name-[0-9a-z]{5}$")); KubernetesTestUtil.assertRegex( - KubernetesSlave.getSlaveName(new PodTemplate("whatever...", volumes, containers)), + ArmadaSlave.getSlaveName(new PodTemplate("whatever...", volumes, containers)), ("jenkins-agent-[0-9a-z]{5}")); } @@ -89,7 +89,7 @@ public void testGetPodRetention() { r.jenkins.clouds.add(cloud); for (KubernetesSlaveTestCase testCase : cases) { cloud.setPodRetention(testCase.getCloudPodRetention()); - KubernetesSlave testSlave = testCase.buildSubject(cloud); + ArmadaSlave testSlave = testCase.buildSubject(cloud); assertEquals(testCase.getExpectedResult(), testSlave.getPodRetention(cloud)); } } catch (IOException | Descriptor.FormException e) { @@ -113,8 +113,8 @@ public static class KubernetesSlaveTestCase { private String podPhase; private T expectedResult; - public KubernetesSlave buildSubject(ArmadaCloud cloud) throws IOException, Descriptor.FormException { - return new KubernetesSlave.Builder() + public ArmadaSlave buildSubject(ArmadaCloud cloud) throws IOException, Descriptor.FormException { + return new ArmadaSlave.Builder() .cloud(cloud) .podTemplate(podTemplate) .build(); diff --git a/src/test/java/io/armadaproject/jenkins/plugin/PodTemplateBuilderTest.java b/src/test/java/io/armadaproject/jenkins/plugin/PodTemplateBuilderTest.java index e58631622..3fd8ddcef 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/PodTemplateBuilderTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/PodTemplateBuilderTest.java @@ -89,14 +89,14 @@ public class PodTemplateBuilderTest { private ArmadaCloud cloud = new ArmadaCloud("test"); @Mock - private KubernetesSlave slave; + private ArmadaSlave slave; @Mock - private KubernetesComputer computer; + private ArmadaComputer computer; @Before public void setUp() { - when(slave.getKubernetesCloud()).thenReturn(cloud); + when(slave.getArmadaCloud()).thenReturn(cloud); } @WithoutJenkins @@ -371,7 +371,7 @@ private void setupStubs() { when(computer.getName()).thenReturn(AGENT_NAME); when(computer.getJnlpMac()).thenReturn(AGENT_SECRET); when(slave.getComputer()).thenReturn(computer); - when(slave.getKubernetesCloud()).thenReturn(cloud); + when(slave.getArmadaCloud()).thenReturn(cloud); } private void validatePod(Pod pod, boolean directConnection) { @@ -443,7 +443,7 @@ private void validatePod(Pod pod, boolean fromYaml, boolean directConnection) { validateContainers(pod, slave, directConnection); } - private void validateContainers(Pod pod, KubernetesSlave slave, boolean directConnection) { + private void validateContainers(Pod pod, ArmadaSlave slave, boolean directConnection) { String[] exclusions = new String[] { "JENKINS_URL", "JENKINS_SECRET", "JENKINS_NAME", "JENKINS_AGENT_NAME", "JENKINS_AGENT_WORKDIR" }; @@ -457,7 +457,7 @@ private void validateContainers(Pod pod, KubernetesSlave slave, boolean directCo } } - private void validateJnlpContainer(Container jnlp, KubernetesSlave slave, boolean directConnection) { + private void validateJnlpContainer(Container jnlp, ArmadaSlave slave, boolean directConnection) { assertThat(jnlp.getCommand(), empty()); List envVars = new ArrayList<>(); if (slave != null) { diff --git a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/AbstractKubernetesPipelineTest.java b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/AbstractKubernetesPipelineTest.java index 1fdf9bba9..6593179c3 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/AbstractKubernetesPipelineTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/AbstractKubernetesPipelineTest.java @@ -48,7 +48,7 @@ import io.armadaproject.jenkins.plugin.ContainerEnvVar; import io.armadaproject.jenkins.plugin.ContainerTemplate; import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.armadaproject.jenkins.plugin.KubernetesComputer; +import io.armadaproject.jenkins.plugin.ArmadaComputer; import io.armadaproject.jenkins.plugin.KubernetesTestUtil; import io.armadaproject.jenkins.plugin.PodTemplate; import io.armadaproject.jenkins.plugin.PodUtils; @@ -220,10 +220,10 @@ protected static List podTemplatesWithLabel(String label, List getKubernetesComputers() { + protected List getKubernetesComputers() { return Arrays.stream(r.jenkins.getComputers()) - .filter(c -> c instanceof KubernetesComputer) - .map(KubernetesComputer.class::cast) + .filter(c -> c instanceof ArmadaComputer) + .map(ArmadaComputer.class::cast) .collect(Collectors.toList()); } } diff --git a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/ContainerExecDecoratorTest.java b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/ContainerExecDecoratorTest.java index c763296ae..ba705cabb 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/ContainerExecDecoratorTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/ContainerExecDecoratorTest.java @@ -67,7 +67,7 @@ import org.apache.commons.lang.StringUtils; import io.armadaproject.jenkins.plugin.KubernetesClientProvider; import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.armadaproject.jenkins.plugin.KubernetesSlave; +import io.armadaproject.jenkins.plugin.ArmadaSlave; import io.armadaproject.jenkins.plugin.PodTemplate; import org.jenkinsci.plugins.workflow.steps.StepContext; import org.junit.After; @@ -99,7 +99,7 @@ public class ContainerExecDecoratorTest { private ContainerExecDecorator decorator; private Pod pod; - private KubernetesSlave agent; + private ArmadaSlave agent; private DumbSlave dumbAgent; @Rule @@ -159,10 +159,10 @@ public void configureCloud() throws Exception { client.pods().withName(podName).waitUntilReady(30, TimeUnit.SECONDS); PodTemplate template = new PodTemplate(); template.setName(pod.getMetadata().getName()); - agent = mock(KubernetesSlave.class); + agent = mock(ArmadaSlave.class); when(agent.getNamespace()).thenReturn(client.getNamespace()); when(agent.getPodName()).thenReturn(pod.getMetadata().getName()); - doReturn(cloud).when(agent).getKubernetesCloud(); + doReturn(cloud).when(agent).getArmadaCloud(); when(agent.getPod()).thenReturn(Optional.of(pod)); StepContext context = mock(StepContext.class); when(context.get(Node.class)).thenReturn(agent); diff --git a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/ContainerExecDecoratorWindowsTest.java b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/ContainerExecDecoratorWindowsTest.java index aa865a9b9..4c63f67a0 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/ContainerExecDecoratorWindowsTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/ContainerExecDecoratorWindowsTest.java @@ -59,7 +59,7 @@ import org.apache.commons.lang.RandomStringUtils; import io.armadaproject.jenkins.plugin.KubernetesClientProvider; import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.armadaproject.jenkins.plugin.KubernetesSlave; +import io.armadaproject.jenkins.plugin.ArmadaSlave; import io.armadaproject.jenkins.plugin.PodTemplate; import org.jenkinsci.plugins.workflow.steps.StepContext; import org.junit.After; @@ -86,7 +86,7 @@ public class ContainerExecDecoratorWindowsTest { private ContainerExecDecorator decorator; private Pod pod; - private KubernetesSlave agent; + private ArmadaSlave agent; @Rule public LoggerRule containerExecLogs = new LoggerRule() @@ -134,10 +134,10 @@ public void configureCloud() throws Exception { client.pods().withName(podName).waitUntilReady(10, TimeUnit.MINUTES); PodTemplate template = new PodTemplate(); template.setName(pod.getMetadata().getName()); - agent = mock(KubernetesSlave.class); + agent = mock(ArmadaSlave.class); when(agent.getNamespace()).thenReturn(client.getNamespace()); when(agent.getPodName()).thenReturn(pod.getMetadata().getName()); - doReturn(cloud).when(agent).getKubernetesCloud(); + doReturn(cloud).when(agent).getArmadaCloud(); when(agent.getPod()).thenReturn(Optional.of(pod)); StepContext context = mock(StepContext.class); when(context.get(Node.class)).thenReturn(agent); diff --git a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/KubernetesPipelineOverridenNamespaceTest.java b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/KubernetesPipelineOverridenNamespaceTest.java index f0340f947..980a6f473 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/KubernetesPipelineOverridenNamespaceTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/KubernetesPipelineOverridenNamespaceTest.java @@ -6,7 +6,7 @@ import java.util.HashMap; import java.util.Map; -import io.armadaproject.jenkins.plugin.KubernetesComputer; +import io.armadaproject.jenkins.plugin.ArmadaComputer; import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; import org.junit.Test; @@ -21,7 +21,7 @@ public void runWithCloudOverriddenNamespace() throws Exception { assertNotNull(createJobThenScheduleRun()); SemaphoreStep.waitForStart("pod/1", b); - for (KubernetesComputer c : getKubernetesComputers()) { + for (ArmadaComputer c : getKubernetesComputers()) { assertEquals( overriddenNamespace, c.getNode().getPod().get().getMetadata().getNamespace()); @@ -47,7 +47,7 @@ public void runWithStepOverriddenNamespace() throws Exception { env.put("OVERRIDDEN_NAMESPACE", stepNamespace); assertNotNull(createJobThenScheduleRun(env)); SemaphoreStep.waitForStart("pod/1", b); - for (KubernetesComputer c : getKubernetesComputers()) { + for (ArmadaComputer c : getKubernetesComputers()) { assertEquals(stepNamespace, c.getNode().getPod().get().getMetadata().getNamespace()); } SemaphoreStep.success("pod/1", null); diff --git a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/KubernetesPipelineTest.java b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/KubernetesPipelineTest.java index f1203d605..e5522578d 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/KubernetesPipelineTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/KubernetesPipelineTest.java @@ -66,16 +66,13 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import jenkins.metrics.api.Metrics; import jenkins.model.Jenkins; -import io.armadaproject.jenkins.plugin.GarbageCollection; import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.armadaproject.jenkins.plugin.KubernetesComputer; -import io.armadaproject.jenkins.plugin.KubernetesSlave; +import io.armadaproject.jenkins.plugin.ArmadaSlave; import io.armadaproject.jenkins.plugin.KubernetesTestUtil; import io.armadaproject.jenkins.plugin.MetricNames; import io.armadaproject.jenkins.plugin.PodAnnotation; @@ -96,7 +93,6 @@ import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; import org.jetbrains.annotations.NotNull; import org.junit.After; -import org.junit.Assert; import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; @@ -148,8 +144,8 @@ public void allDead() throws Exception { Thread.sleep(100); } Jenkins.get().getNodes().stream() - .filter(KubernetesSlave.class::isInstance) - .map(KubernetesSlave.class::cast) + .filter(ArmadaSlave.class::isInstance) + .map(ArmadaSlave.class::cast) .forEach(agent -> { LOGGER.info(() -> "Deleting remaining node " + agent); try { @@ -251,7 +247,7 @@ public void runInPod() throws Exception { .collect(Collectors.toList()), // LogRecord does not override toString emptyIterable()); - assertTrue(Metrics.metricRegistry().counter(MetricNames.PODS_LAUNCHED).getCount() > 0); + assertTrue(Metrics.metricRegistry().counter(MetricNames.JOBS_LAUNCHED).getCount() > 0); } @Test @@ -502,9 +498,9 @@ public void podTemplateWithMultipleLabels() throws Exception { SemaphoreStep.waitForStart("pod/1", b); Map labels = getLabels(cloud, this, name); labels.put("jenkins/label", "label1_label2"); - KubernetesSlave node = r.jenkins.getNodes().stream() - .filter(KubernetesSlave.class::isInstance) - .map(KubernetesSlave.class::cast) + ArmadaSlave node = r.jenkins.getNodes().stream() + .filter(ArmadaSlave.class::isInstance) + .map(ArmadaSlave.class::cast) .findAny() .get(); assertTrue(node.getAssignedLabels().containsAll(Label.parse("label1 label2"))); @@ -633,12 +629,12 @@ public void computerCantBeConfigured() throws Exception { .everywhere() .to("admin")); SemaphoreStep.waitForStart("pod/1", b); - Optional optionalNode = r.jenkins.getNodes().stream() - .filter(KubernetesSlave.class::isInstance) - .map(KubernetesSlave.class::cast) + Optional optionalNode = r.jenkins.getNodes().stream() + .filter(ArmadaSlave.class::isInstance) + .map(ArmadaSlave.class::cast) .findAny(); assertTrue(optionalNode.isPresent()); - KubernetesSlave node = optionalNode.get(); + ArmadaSlave node = optionalNode.get(); JenkinsRule.WebClient wc = r.createWebClient(); wc.getOptions().setPrintContentOnFailingStatusCode(false); @@ -886,35 +882,6 @@ public void cancelOnlyRelevantQueueItem() throws Exception { r.assertLogContains("ran on special agent", b); } - @Test - public void garbageCollection() throws Exception { - // Pod exists, need to kill the build, delete the agent without deleting the pod. - // Wait for the timeout to expire and check that the pod is deleted. - var garbageCollection = new GarbageCollection(); - // Considering org.csanchez.jenkins.plugins.kubernetes.GarbageCollection.recurrencePeriod=5, this leaves 3 ticks - garbageCollection.setTimeout(15); - cloud.setGarbageCollection(garbageCollection); - r.jenkins.save(); - r.waitForMessage("Running on remote agent", b); - Pod pod = null; - for (var c : r.jenkins.getComputers()) { - if (c instanceof KubernetesComputer) { - var node = (KubernetesSlave) c.getNode(); - pod = node.getPod().get(); - Assert.assertNotNull(pod); - b.doKill(); - r.jenkins.removeNode(node); - break; - } - } - r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b)); - final var finalPod = pod; - var client = cloud.connect(); - assertNotNull(client.resource(finalPod).get()); - await().timeout(1, TimeUnit.MINUTES) - .until(() -> client.resource(finalPod).get() == null); - } - @Test public void handleEviction() throws Exception { SemaphoreStep.waitForStart("pod/1", b); diff --git a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/RestartPipelineTest.java b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/RestartPipelineTest.java index 4ab5bf316..ee4c1e50d 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/pipeline/RestartPipelineTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/pipeline/RestartPipelineTest.java @@ -45,7 +45,7 @@ import io.armadaproject.jenkins.plugin.ContainerEnvVar; import io.armadaproject.jenkins.plugin.ContainerTemplate; import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.armadaproject.jenkins.plugin.KubernetesSlave; +import io.armadaproject.jenkins.plugin.ArmadaSlave; import io.armadaproject.jenkins.plugin.PodTemplate; import io.armadaproject.jenkins.plugin.model.KeyValueEnvVar; import io.armadaproject.jenkins.plugin.model.SecretEnvVar; @@ -284,10 +284,10 @@ public void taskListenerAfterRestart() throws Throwable { .getItemByFullName(projectName.get(), WorkflowJob.class) .getBuildByNumber(1); Optional first = r.jenkins.getNodes().stream() - .filter(KubernetesSlave.class::isInstance) + .filter(ArmadaSlave.class::isInstance) .findFirst(); assertTrue("Kubernetes node should be present after restart", first.isPresent()); - KubernetesSlave node = (KubernetesSlave) first.get(); + ArmadaSlave node = (ArmadaSlave) first.get(); r.waitForMessage("Ready to run", b); waitForTemplate(node).getListener().getLogger().println("This got printed"); r.waitForMessage("This got printed", b); @@ -311,10 +311,10 @@ public void taskListenerAfterRestart_multipleLabels() throws Throwable { .getItemByFullName(projectName.get(), WorkflowJob.class) .getBuildByNumber(1); Optional first = r.jenkins.getNodes().stream() - .filter(KubernetesSlave.class::isInstance) + .filter(ArmadaSlave.class::isInstance) .findFirst(); assertTrue("Kubernetes node should be present after restart", first.isPresent()); - KubernetesSlave node = (KubernetesSlave) first.get(); + ArmadaSlave node = (ArmadaSlave) first.get(); r.waitForMessage("Ready to run", b); waitForTemplate(node).getListener().getLogger().println("This got printed"); r.waitForMessage("This got printed", b); @@ -323,7 +323,7 @@ public void taskListenerAfterRestart_multipleLabels() throws Throwable { }); } - private PodTemplate waitForTemplate(KubernetesSlave node) throws InterruptedException { + private PodTemplate waitForTemplate(ArmadaSlave node) throws InterruptedException { while (node.getTemplateOrNull() == null) { Thread.sleep(100L); } diff --git a/src/test/java/io/armadaproject/jenkins/plugin/pod/decorator/PodDecoratorTest.java b/src/test/java/io/armadaproject/jenkins/plugin/pod/decorator/PodDecoratorTest.java index b90ed978b..b377806ec 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/pod/decorator/PodDecoratorTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/pod/decorator/PodDecoratorTest.java @@ -7,7 +7,7 @@ import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.api.model.PodBuilder; import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.armadaproject.jenkins.plugin.KubernetesSlave; +import io.armadaproject.jenkins.plugin.ArmadaSlave; import io.armadaproject.jenkins.plugin.PodTemplate; import io.armadaproject.jenkins.plugin.PodTemplateBuilder; import org.junit.Before; @@ -27,13 +27,13 @@ public class PodDecoratorTest { public MockitoRule mockitoRule = MockitoJUnit.rule(); @Mock - private KubernetesSlave slave; + private ArmadaSlave slave; private ArmadaCloud cloud = new ArmadaCloud("test"); @Before public void setUp() { - when(slave.getKubernetesCloud()).thenReturn(cloud); + when(slave.getArmadaCloud()).thenReturn(cloud); } @TestExtension("activeDecorator") diff --git a/src/test/java/io/armadaproject/jenkins/plugin/pod/retention/ReaperTest.java b/src/test/java/io/armadaproject/jenkins/plugin/pod/retention/ReaperTest.java index ae2c36068..07be27329 100644 --- a/src/test/java/io/armadaproject/jenkins/plugin/pod/retention/ReaperTest.java +++ b/src/test/java/io/armadaproject/jenkins/plugin/pod/retention/ReaperTest.java @@ -51,8 +51,8 @@ import okhttp3.mockwebserver.RecordedRequest; import io.armadaproject.jenkins.plugin.KubernetesClientProvider; import io.armadaproject.jenkins.plugin.ArmadaCloud; -import io.armadaproject.jenkins.plugin.KubernetesComputer; -import io.armadaproject.jenkins.plugin.KubernetesSlave; +import io.armadaproject.jenkins.plugin.ArmadaComputer; +import io.armadaproject.jenkins.plugin.ArmadaSlave; import io.armadaproject.jenkins.plugin.PodTemplate; import org.junit.After; import org.junit.Rule; @@ -90,7 +90,7 @@ public void testMaybeActivate() throws IOException, InterruptedException { .always(); // add node that does not exist in k8s so it get's removed - KubernetesSlave podNotRunning = addNode(cloud, "k8s-node-123", "k8s-node"); + ArmadaSlave podNotRunning = addNode(cloud, "k8s-node-123", "k8s-node"); assertEquals("node added to jenkins", j.jenkins.getNodes().size(), 1); // activate reaper @@ -110,7 +110,7 @@ public void testMaybeActivate() throws IOException, InterruptedException { .assertRequestCountAtLeast(watchPodsPath, 1); // create new node to verify activate is not run again - KubernetesSlave newNode = addNode(cloud, "new-123", "new"); + ArmadaSlave newNode = addNode(cloud, "new-123", "new"); j.jenkins.addNode(newNode); assertEquals("node added to jenkins", j.jenkins.getNodes().size(), 1); // call again should not add any more calls @@ -150,9 +150,9 @@ public void testActivateOnNewComputer() throws IOException, InterruptedException // add new cloud ArmadaCloud cloud = addCloud("k8s", "foo"); - KubernetesSlave n2 = addNode(cloud, "p1-123", "p1"); + ArmadaSlave n2 = addNode(cloud, "p1-123", "p1"); TaskListener tl = mock(TaskListener.class); - KubernetesComputer kc = new KubernetesComputer(n2); + ArmadaComputer kc = new ArmadaComputer(n2); // should not be watching the newly created cloud at this point assertShouldNotBeWatching(r, cloud); @@ -208,9 +208,9 @@ public void testReconnectOnNewComputer() throws InterruptedException, IOExceptio System.out.println("Watch removed"); // launch computer - KubernetesSlave n2 = addNode(cloud, "p1-123", "p1"); + ArmadaSlave n2 = addNode(cloud, "p1-123", "p1"); TaskListener tl = mock(TaskListener.class); - KubernetesComputer kc = new KubernetesComputer(n2); + ArmadaComputer kc = new ArmadaComputer(n2); r.preLaunch(kc, tl); // should have started new watch @@ -319,7 +319,7 @@ public void testReplaceWatchWhenCloudUpdated() throws InterruptedException, IOEx cloud.setNamespace("bar"); j.jenkins.save(); - KubernetesSlave node = addNode(cloud, "node-123", "node"); + ArmadaSlave node = addNode(cloud, "node-123", "node"); // watch is still active assertShouldBeWatching(r, cloud); @@ -466,7 +466,7 @@ public void testCloseWatchersOnShutdown() throws InterruptedException { @Test(timeout = 10_000) public void testDeleteNodeOnPodDelete() throws IOException, InterruptedException { ArmadaCloud cloud = addCloud("k8s", "foo"); - KubernetesSlave node = addNode(cloud, "node-123", "node"); + ArmadaSlave node = addNode(cloud, "node-123", "node"); Pod node123 = createPod(node); server.expect() @@ -509,7 +509,7 @@ public void testDeleteNodeOnPodDelete() throws IOException, InterruptedException @Test(timeout = 10_000) public void testTerminateAgentOnContainerTerminated() throws IOException, InterruptedException { ArmadaCloud cloud = addCloud("k8s", "foo"); - KubernetesSlave node = addNode(cloud, "node-123", "node"); + ArmadaSlave node = addNode(cloud, "node-123", "node"); Pod node123 = withContainerStatusTerminated(createPod(node)); String watchPodsPath = "/api/v1/namespaces/foo/pods?allowWatchBookmarks=true&watch=true"; @@ -566,7 +566,7 @@ public void testTerminateAgentOnContainerTerminated() throws IOException, Interr public void testTerminateAgentOnPodFailed() throws IOException, InterruptedException { System.out.println(server.getKubernetesMockServer().getPort()); ArmadaCloud cloud = addCloud("k8s", "foo"); - KubernetesSlave node = addNode(cloud, "node-123", "node"); + ArmadaSlave node = addNode(cloud, "node-123", "node"); Pod node123 = createPod(node); node123.getStatus().setPhase("Failed"); @@ -607,7 +607,7 @@ public void testTerminateAgentOnPodFailed() throws IOException, InterruptedExcep @Test(timeout = 10_000) public void testTerminateAgentOnImagePullBackoff() throws IOException, InterruptedException { ArmadaCloud cloud = addCloud("k8s", "foo"); - KubernetesSlave node = addNode(cloud, "node-123", "node"); + ArmadaSlave node = addNode(cloud, "node-123", "node"); Pod node123 = withContainerImagePullBackoff(createPod(node)); Reaper.TerminateAgentOnImagePullBackOff.BACKOFF_EVENTS_LIMIT = 2; @@ -684,7 +684,7 @@ private Pod withContainerStatusTerminated(Pod pod) { return pod; } - private Pod createPod(KubernetesSlave node) { + private Pod createPod(ArmadaSlave node) { return new PodBuilder() .withNewMetadata() .withName(node.getPodName()) @@ -698,12 +698,12 @@ private Pod createPod(KubernetesSlave node) { .build(); } - private KubernetesSlave addNode(ArmadaCloud cld, String podName, String nodeName) throws IOException { - KubernetesSlave node = mock(KubernetesSlave.class); + private ArmadaSlave addNode(ArmadaCloud cld, String podName, String nodeName) throws IOException { + ArmadaSlave node = mock(ArmadaSlave.class); when(node.getNodeName()).thenReturn(nodeName); when(node.getNamespace()).thenReturn(cld.getNamespace()); when(node.getPodName()).thenReturn(podName); - when(node.getKubernetesCloud()).thenReturn(cld); + when(node.getArmadaCloud()).thenReturn(cld); when(node.getCloudName()).thenReturn(cld.name); when(node.getNumExecutors()).thenReturn(1); PodTemplate podTemplate = new PodTemplate(); @@ -711,7 +711,7 @@ private KubernetesSlave addNode(ArmadaCloud cld, String podName, String nodeName when(node.getRunListener()).thenReturn(StreamTaskListener.fromStderr()); ComputerLauncher launcher = mock(ComputerLauncher.class); when(node.getLauncher()).thenReturn(launcher); - KubernetesComputer computer = mock(KubernetesComputer.class); + ArmadaComputer computer = mock(ArmadaComputer.class); when(node.getComputer()).thenReturn(computer); j.jenkins.addNode(node); return node; @@ -803,7 +803,7 @@ public static class CapturingReaperListener extends ExternalResource implements @Override public synchronized void onEvent( @NonNull Watcher.Action action, - @NonNull KubernetesSlave node, + @NonNull ArmadaSlave node, @NonNull Pod pod, @NonNull Set terminationReaons) throws IOException, InterruptedException { @@ -843,7 +843,7 @@ public CapturingReaperListener waitForEvents() throws InterruptedException { * @param action action to match * @param node target node */ - public synchronized void expectEvent(Watcher.Action action, KubernetesSlave node) { + public synchronized void expectEvent(Watcher.Action action, ArmadaSlave node) { boolean found = CAPTURED_EVENTS.stream().anyMatch(e -> e.action == action && e.node == node); assertTrue("expected event: " + action + ", " + node, found); } @@ -863,10 +863,10 @@ protected void after() { private static class ReaperListenerWatchEvent { final Watcher.Action action; - final KubernetesSlave node; + final ArmadaSlave node; final Pod pod; - private ReaperListenerWatchEvent(Watcher.Action action, KubernetesSlave node, Pod pod) { + private ReaperListenerWatchEvent(Watcher.Action action, ArmadaSlave node, Pod pod) { this.action = action; this.node = node; this.pod = pod; diff --git a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/cascadingDelete.groovy b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/cascadingDelete.groovy index 6b68c117d..d5129227e 100644 --- a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/cascadingDelete.groovy +++ b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/cascadingDelete.groovy @@ -1,5 +1,4 @@ podTemplate( - podRetention: never(), idleMinutes: 0, yaml: ''' apiVersion: v1 diff --git a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/declarative.groovy b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/declarative.groovy index ef76cb5a1..98992fb5a 100644 --- a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/declarative.groovy +++ b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/declarative.groovy @@ -8,7 +8,6 @@ pipeline { command 'sleep' args '9999999' } - podRetention onFailure() } } environment { diff --git a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runIn2Pods.groovy b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runIn2Pods.groovy index 6a3c41179..f77241d13 100644 --- a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runIn2Pods.groovy +++ b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runIn2Pods.groovy @@ -7,7 +7,7 @@ podTemplate(label: '$NAME-1', containers: [ stage('Run') { container('busybox') { sh """ - ## durable-task plugin generates a script.sh file. + ## durable-item plugin generates a script.sh file. ## echo "script file: \$(find ../../.. -iname script.sh))" echo "script file contents: \$(find ../../.. -iname script.sh -exec cat {} \\;)" @@ -28,7 +28,7 @@ podTemplate(label: '$NAME-2', containers: [ container('busybox2') { sh """ - ## durable-task plugin generates a script.sh file. + ## durable-item plugin generates a script.sh file. ## echo "script file: \$(find ../../.. -iname script.sh))" echo "script file contents: \$(find ../../.. -iname script.sh -exec cat {} \\;)" diff --git a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPod.groovy b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPod.groovy index 2039c6b42..748965f6f 100644 --- a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPod.groovy +++ b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPod.groovy @@ -8,7 +8,7 @@ podTemplate(label: '$NAME', containers: [ container('busybox') { echo "container=$POD_CONTAINER" sh """ - ## durable-task plugin generates a script.sh file. + ## durable-item plugin generates a script.sh file. ## echo "script file: \$(find ../../.. -iname script.sh))" echo "script file contents: \$(find ../../.. -iname script.sh -exec cat {} \\;)" diff --git a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPodFromYaml.groovy b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPodFromYaml.groovy index bcf5bc2d4..3d9a108cb 100644 --- a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPodFromYaml.groovy +++ b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPodFromYaml.groovy @@ -26,7 +26,7 @@ spec: stage('Run') { container('busybox') { sh '''set +x - ## durable-task plugin generates a script.sh file. + ## durable-item plugin generates a script.sh file. ## echo "script file: $(find ../../.. -iname script.sh))" echo "script file contents: $(find ../../.. -iname script.sh -exec cat {} \\;)" diff --git a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPodWithRetention.groovy b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPodWithRetention.groovy index 71b8234fd..2b5f9ceb2 100644 --- a/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPodWithRetention.groovy +++ b/src/test/resources/io/armadaproject/jenkins/plugin/pipeline/runInPodWithRetention.groovy @@ -1,4 +1,4 @@ -podTemplate(podRetention: always(), containers: [ +podTemplate(containers: [ containerTemplate(name: 'busybox', image: 'busybox', ttyEnabled: true, command: '/bin/cat'), ]) {