Skip to content

Commit

Permalink
Fix #111 (#119)
Browse files Browse the repository at this point in the history
* Fix #111
  • Loading branch information
terma committed Jul 11, 2019
1 parent 4878e08 commit 76325ed
Show file tree
Hide file tree
Showing 21 changed files with 555 additions and 126 deletions.
2 changes: 1 addition & 1 deletion src/main/java/com/amazon/jenkins/ec2fleet/CloudNanny.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public long getRecurrencePeriod() {
}

@Override
protected void doRun() throws Exception {
protected void doRun() {
final List<EC2FleetStatusInfo> info = new ArrayList<>();
for (final Cloud cloud : getClouds()) {
if (!(cloud instanceof EC2FleetCloud)) continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
* @see EC2FleetNode
* @see EC2FleetNodeComputer
*/
@SuppressWarnings("WeakerAccess")
@ThreadSafe
public class EC2FleetAutoResubmitComputerLauncher extends DelegatingComputerLauncher {

Expand All @@ -41,12 +42,8 @@ public class EC2FleetAutoResubmitComputerLauncher extends DelegatingComputerLaun
*/
private static final int RESCHEDULE_QUIET_PERIOD_SEC = 10;

private final boolean disableTaskResubmit;

protected EC2FleetAutoResubmitComputerLauncher(
final ComputerLauncher launcher, final boolean disableTaskResubmit) {
public EC2FleetAutoResubmitComputerLauncher(final ComputerLauncher launcher) {
super(launcher);
this.disableTaskResubmit = disableTaskResubmit;
}

/**
Expand Down Expand Up @@ -77,8 +74,19 @@ protected EC2FleetAutoResubmitComputerLauncher(
*/
@Override
public void afterDisconnect(final SlaveComputer computer, final TaskListener listener) {
// according to jenkins docs could be null in edge cases, check ComputerLauncher.afterDisconnect
if (computer == null) return;

// in some multi-thread edge cases cloud could be null for some time, just be ok with that
final EC2FleetCloud cloud = ((EC2FleetNodeComputer) computer).getCloud();
if (cloud == null) {
LOGGER.warning("Edge case cloud is null for computer " + computer.getDisplayName()
+ " should be autofixed in a few minutes, if no please create issue for plugin");
return;
}

final boolean unexpectedDisconnect = computer.isOffline() && computer.getOfflineCause() instanceof OfflineCause.ChannelTermination;
if (!disableTaskResubmit && unexpectedDisconnect) {
if (!cloud.isDisableTaskResubmit() && unexpectedDisconnect) {
final List<Executor> executors = computer.getExecutors();
LOGGER.log(LOG_LEVEL, "Unexpected " + computer.getDisplayName()
+ " termination, resubmit");
Expand Down Expand Up @@ -109,7 +117,7 @@ public void afterDisconnect(final SlaveComputer computer, final TaskListener lis
} else {
LOGGER.log(LOG_LEVEL, "Unexpected " + computer.getDisplayName()
+ " termination but resubmit disabled, no actions, disableTaskResubmit: "
+ disableTaskResubmit + ", offline: " + computer.isOffline()
+ cloud.isDisableTaskResubmit() + ", offline: " + computer.isOffline()
+ ", offlineCause: " + (computer.getOfflineCause() != null ? computer.getOfflineCause().getClass() : "null"));
}

Expand Down
41 changes: 38 additions & 3 deletions src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloud.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,22 @@ public class EC2FleetCloud extends Cloud {
private static final SimpleFormatter sf = new SimpleFormatter();
private static final Logger LOGGER = Logger.getLogger(EC2FleetCloud.class.getName());

/**
* Provide unique identifier for this instance of {@link EC2FleetCloud}, <code>transient</code>
* will not be stored. Not available for customer, instead use {@link EC2FleetCloud#name}
* will be used only during Jenkins configuration update <code>config.jelly</code>,
* when new instance of same cloud is created and we need to find old instance and
* repoint resources like {@link Computer} {@link Node} etc.
* <p>
* It's lazy to support old versions which don't have this field at all.
* <p>
* However it's stable, as soon as it will be created and called first uuid will be same
* for all future calls to the same instances of lazy uuid.
*
* @see EC2FleetCloudAware
*/
private transient LazyUuid id;

/**
* Replaced with {@link EC2FleetCloud#awsCredentialsId}
* <p>
Expand Down Expand Up @@ -122,6 +138,7 @@ public class EC2FleetCloud extends Cloud {

@DataBoundConstructor
public EC2FleetCloud(final String name,
final String oldId,
final String awsCredentialsId,
final @Deprecated String credentialsId,
final String region,
Expand Down Expand Up @@ -164,6 +181,12 @@ public EC2FleetCloud(final String name,
this.disableTaskResubmit = disableTaskResubmit;
this.initOnlineTimeoutSec = initOnlineTimeoutSec;
this.initOnlineCheckIntervalSec = initOnlineCheckIntervalSec;

if (StringUtils.isNotEmpty(oldId)) {
// existent cloud was modified, let's re-assign all dependencies of old cloud instance
// to new one
EC2FleetCloudAwareUtils.reassign(oldId, this);
}
}

/**
Expand All @@ -176,6 +199,16 @@ public String getAwsCredentialsId() {
return StringUtils.isNotBlank(awsCredentialsId) ? awsCredentialsId : credentialsId;
}

/**
* Called old as will be used by new instance of cloud, for
* which this id is old (not current)
*
* @return id of current cloud
*/
public String getOldId() {
return id.getValue();
}

public boolean isDisableTaskResubmit() {
return disableTaskResubmit;
}
Expand Down Expand Up @@ -467,6 +500,8 @@ private Object readResolve() {
}

private void initCaches() {
id = new LazyUuid();

plannedNodesCache = new HashSet<>();
fleetInstancesCache = new HashSet<>();
dyingFleetInstancesCache = new HashSet<>();
Expand Down Expand Up @@ -527,14 +562,14 @@ private void addNewSlave(final AmazonEC2 ec2, final String instanceId, FleetStat
}

final EC2FleetAutoResubmitComputerLauncher computerLauncher = new EC2FleetAutoResubmitComputerLauncher(
computerConnector.launch(address, TaskListener.NULL), disableTaskResubmit);
computerConnector.launch(address, TaskListener.NULL));
final Node.Mode nodeMode = restrictUsage ? Node.Mode.EXCLUSIVE : Node.Mode.NORMAL;
final EC2FleetNode node = new EC2FleetNode(instanceId, "Fleet slave for " + instanceId,
effectiveFsRoot, effectiveNumExecutors, nodeMode, labelString, new ArrayList<NodeProperty<?>>(),
name, computerLauncher);
this, computerLauncher);

// Initialize our retention strategy
node.setRetentionStrategy(new IdleRetentionStrategy(this));
node.setRetentionStrategy(new IdleRetentionStrategy());

final Jenkins jenkins = Jenkins.getInstance();
//noinspection SynchronizationOnLocalVariableOrMethodParameter
Expand Down
23 changes: 23 additions & 0 deletions src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetCloudAware.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package com.amazon.jenkins.ec2fleet;

import javax.annotation.Nonnull;

/**
* Interface to mark object that it's require cloud change update. Jenkins always creates
* new instance of {@link hudson.slaves.Cloud} each time when you save Jenkins configuration page
* regardless of was it actually changed or not.
* <p>
* Jenkins never mutate existent {@link hudson.slaves.Cloud} instance
* <p>
* As result all objects which depends on info from cloud
* should be start to consume new instance of object to be able get new configuration if any.
* <p>
* {@link EC2FleetCloud} is responsible to update all dependencies with new reference
*/
public interface EC2FleetCloudAware {

EC2FleetCloud getCloud();

void setCloud(@Nonnull EC2FleetCloud cloud);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package com.amazon.jenkins.ec2fleet;

import hudson.model.Computer;
import hudson.model.Node;
import jenkins.model.Jenkins;

import javax.annotation.Nonnull;
import java.util.logging.Logger;

/**
* @see EC2FleetCloudAware
*/
@SuppressWarnings("WeakerAccess")
public class EC2FleetCloudAwareUtils {

private static final Logger LOGGER = Logger.getLogger(EC2FleetCloudAwareUtils.class.getName());

public static void reassign(final @Nonnull String oldId, @Nonnull final EC2FleetCloud cloud) {
for (final Computer computer : Jenkins.getActiveInstance().getComputers()) {
checkAndReassign(oldId, cloud, computer);
}

for (final Node node : Jenkins.getActiveInstance().getNodes()) {
checkAndReassign(oldId, cloud, node);
}

LOGGER.info("Finish to reassign resources from old cloud with id " + oldId + " to " + cloud.getDisplayName());
}

private static void checkAndReassign(final String oldId, final EC2FleetCloud cloud, final Object object) {
if (object instanceof EC2FleetCloudAware) {
final EC2FleetCloudAware cloudAware = (EC2FleetCloudAware) object;
final EC2FleetCloud oldCloud = cloudAware.getCloud();
if (oldCloud != null && oldId.equals(oldCloud.getOldId())) {
((EC2FleetCloudAware) object).setCloud(cloud);
LOGGER.info("Reassign " + object + " from " + oldCloud.getDisplayName() + " to " + cloud.getDisplayName());
}
}
}
}
27 changes: 21 additions & 6 deletions src/main/java/com/amazon/jenkins/ec2fleet/EC2FleetNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,20 @@
import hudson.slaves.NodeProperty;
import hudson.slaves.RetentionStrategy;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.List;

public class EC2FleetNode extends Slave implements EphemeralNode {
public class EC2FleetNode extends Slave implements EphemeralNode, EC2FleetCloudAware {

private final String cloudName;
private volatile EC2FleetCloud cloud;

@SuppressWarnings("WeakerAccess")
public EC2FleetNode(final String name, final String nodeDescription, final String remoteFS, final int numExecutors, final Mode mode, final String label,
final List<? extends NodeProperty<?>> nodeProperties, final String cloudName, ComputerLauncher launcher) throws IOException, Descriptor.FormException {
final List<? extends NodeProperty<?>> nodeProperties, final EC2FleetCloud cloud, ComputerLauncher launcher) throws IOException, Descriptor.FormException {
super(name, nodeDescription, remoteFS, numExecutors, mode, label,
launcher, RetentionStrategy.NOOP, nodeProperties);
this.cloudName = cloudName;
this.cloud = cloud;
}

@Override
Expand All @@ -32,12 +33,26 @@ public Node asNode() {

@Override
public String getDisplayName() {
return cloudName + " " + name;
// in some multi-thread edge cases cloud could be null for some time, just be ok with that
return (cloud == null ? "unknown fleet" : cloud.getDisplayName()) + " " + name;
}

@Override
public Computer createComputer() {
return new EC2FleetNodeComputer(this);
return new EC2FleetNodeComputer(this, name, cloud);
}

@Override
public EC2FleetCloud getCloud() {
return cloud;
}

/**
* {@inheritDoc}
*/
@Override
public void setCloud(@Nonnull EC2FleetCloud cloud) {
this.cloud = cloud;
}

@Extension
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,23 @@
import hudson.slaves.SlaveComputer;

import javax.annotation.Nonnull;
import javax.annotation.concurrent.ThreadSafe;

/**
* @see EC2FleetNode
* @see EC2FleetAutoResubmitComputerLauncher
*/
public class EC2FleetNodeComputer extends SlaveComputer {
@ThreadSafe
public class EC2FleetNodeComputer extends SlaveComputer implements EC2FleetCloudAware {

public EC2FleetNodeComputer(final Slave slave) {
private final String name;

private volatile EC2FleetCloud cloud;

public EC2FleetNodeComputer(final Slave slave, @Nonnull final String name, @Nonnull final EC2FleetCloud cloud) {
super(slave);
this.name = name;
this.cloud = cloud;
}

@Override
Expand All @@ -22,17 +30,28 @@ public EC2FleetNode getNode() {

/**
* Return label which will represent executor in "Build Executor Status"
* section of Jenkins UI. After reconfiguration actual {@link EC2FleetNode} could
* be removed before this, so name will be just predefined static.
* section of Jenkins UI.
*
* @return node display name or if node is <code>null</code> predefined text about that
* @return node display name
*/
@Nonnull
@Override
public String getDisplayName() {
// getNode() hit map to find node by name
final EC2FleetNode node = getNode();
return node == null ? "removing fleet node" : node.getDisplayName();
// in some multi-thread edge cases cloud could be null for some time, just be ok with that
return (cloud == null ? "unknown fleet" : cloud.getDisplayName()) + " " + name;
}

/**
* {@inheritDoc}
*/
@Override
public void setCloud(@Nonnull final EC2FleetCloud cloud) {
this.cloud = cloud;
}

@Override
public EC2FleetCloud getCloud() {
return cloud;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* @see EC2FleetStatusWidget
* @see CloudNanny
*/
@SuppressWarnings("WeakerAccess")
@SuppressWarnings({"WeakerAccess", "unused"})
@ThreadSafe
public class EC2FleetStatusInfo extends Widget {

Expand Down
Loading

0 comments on commit 76325ed

Please sign in to comment.