Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,13 @@ public void reallyRun() {
}

logger.trace("End cleanup expired async-jobs");

// 3) Cleanup orphaned networks stuck in Implementing state without async jobs
try {
cleanupOrphanedNetworks();
} catch (Throwable e) {
logger.error("Unexpected exception when trying to cleanup orphaned networks", e);
}
Comment on lines +973 to +978
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is a try catch as highest level inside cleanupOrphanedNetworks(), is this one needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just following how the other cases are handled. Also, we will be able to know which operation really failed in this block

} catch (Throwable e) {
logger.error("Unexpected exception when trying to execute queue item, ", e);
}
Expand Down Expand Up @@ -1284,6 +1291,74 @@ private void cleanupFailedSnapshotsCreatedWithDefaultStrategy(final long msid) {
}
}

/**
* Cleanup networks that are stuck in Implementing state without associated async jobs.
* This only processes networks that have been stuck for longer than the job expiration threshold.
*/
private void cleanupOrphanedNetworks() {
try {
SearchCriteria<NetworkVO> sc = networkDao.createSearchCriteria();
sc.addAnd("state", SearchCriteria.Op.EQ, Network.State.Implementing);
sc.addAnd("removed", SearchCriteria.Op.NULL);
List<NetworkVO> implementingNetworks = networkDao.search(sc, null);

if (implementingNetworks == null || implementingNetworks.isEmpty()) {
return;
}

logger.debug("Found {} networks in Implementing state, checking for orphaned networks", implementingNetworks.size());

final long expireMinutes = JobExpireMinutes.value();
final Date cutoffTime = new Date(System.currentTimeMillis() - (expireMinutes * 60 * 1000));

for (NetworkVO network : implementingNetworks) {
if (network.getCreated().after(cutoffTime)) {
logger.trace("Network {} in Implementing state is only {} minutes old (threshold: {} minutes), skipping cleanup",
network.getId(),
(System.currentTimeMillis() - network.getCreated().getTime()) / 60000,
expireMinutes);
continue;
}

List<AsyncJobVO> jobs = _jobDao.findInstancePendingAsyncJobs("Network", network.getAccountId());
boolean hasActiveJob = false;
for (AsyncJobVO job : jobs) {
if (job.getInstanceId() != null && job.getInstanceId().equals(network.getId())) {
hasActiveJob = true;
break;
}
}

if (hasActiveJob) {
logger.debug("Network {} in Implementing state has active async job, skipping cleanup", network.getId());
continue;
}

logger.warn("Found orphaned network {} in Implementing state without async job. " +
"Network created: {}, age: {} minutes, expiration threshold: {} minutes. Transitioning to Shutdown state.",
network.getId(), network.getCreated(),
(System.currentTimeMillis() - network.getCreated().getTime()) / 60000,
expireMinutes);
updateNetworkState(network);

}
} catch (Exception e) {
logger.error("Error while cleaning up orphaned networks", e);
}
}

private void updateNetworkState(NetworkVO network) {
try {
networkOrchestrationService.stateTransitTo(network, Network.Event.OperationFailed);
logger.info("Successfully transitioned orphaned network {} to Shutdown state using state machine", network.getId());
} catch (final NoTransitionException e) {
logger.debug("State transition failed for orphaned network {}, forcing state update", network.getId());
network.setState(Network.State.Shutdown);
networkDao.update(network.getId(), network);
logger.info("Successfully forced orphaned network {} to Shutdown state", network.getId());
}
}

@Override
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
}
Expand Down