Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
98 commits
Select commit Hold shift + click to select a range
0b6431b
IGNITE-13805 (wip, draft) Restore local files ignore distrbution.
xtern Jan 12, 2021
c0a4b26
IGNITE-13805 Prevent start cache that is currently restoring.
xtern Jan 14, 2021
43364e4
IGNITE-13805 (wip) Rollback on cache start.
xtern Jan 16, 2021
0034c2d
IGNITE-13805 (wip) Cluster state change processing.
xtern Jan 18, 2021
751343f
IGNITE-13805 (wip) Check cacheId instead of name.
xtern Jan 18, 2021
3ebac03
IGNITE-13805 Complete future after cache started.
xtern Jan 18, 2021
63a4edc
IGNITE-13805 Rollback only failed to start groups.
xtern Jan 20, 2021
c1981ab
IGNITE-13805 Update metadata from single node.
xtern Jan 20, 2021
8a9629b
IGNITE-13805 Code cleanup.
xtern Jan 21, 2021
0e380e0
IGNITE-13805 Test with nodeFilter.
xtern Jan 22, 2021
ccd7747
IGNITE-13805 Decomposition/cleanup/refactoring (wip).
xtern Jan 25, 2021
14dd119
IGNITE-13805 No need to distrib rollback.
xtern Jan 25, 2021
f943981
IGNITE-13805 Code cleanup.
xtern Jan 25, 2021
9c55d88
IGNITE-13805 Don't check cache existence on finish phase (cannot rais…
xtern Jan 26, 2021
331fd75
IGNITE-13805 Code cleanup.
xtern Jan 26, 2021
ad549f6
IGNITE-13805 Minor code cleanup.
xtern Jan 26, 2021
3959cec
IGNITE-13805 Minor code cleanup.
xtern Jan 27, 2021
622bc9d
IGNITE-13805 Minor code cleanup.
xtern Jan 27, 2021
8271bb5
IGNITE-13805 Minor code cleanup.
xtern Jan 27, 2021
28975ea
IGNITE-13805 Read cache configs etc should be in filepagestore manager.
xtern Jan 28, 2021
8b068e7
IGNITE-13805 Meta update move to binary processor.
xtern Jan 28, 2021
b519979
IGNITE-13805 Minor code cleanp.
xtern Jan 28, 2021
1e0a3c9
IGNITE-13805 Extract context from future (wip).
xtern Jan 29, 2021
a5ad770
IGNITE-13805 Refactoring, prepare through compute (wip).
xtern Feb 12, 2021
7a1633d
IGNITE-13805 Code cleanup.
xtern Feb 14, 2021
d39a661
IGNITE-13805 Fail cache start if node left during cache start.
xtern Feb 16, 2021
1eaa78d
IGNITE-13805 Don't cleanup directories in automatic mode.
xtern Feb 17, 2021
67fe8ed
IGNITE-13805 Code cleanup.
xtern Feb 18, 2021
2b8891d
IGNITE-13805 Code cleanup.
xtern Feb 19, 2021
f2785a7
IGNITE-13805 Code cleanup.
xtern Mar 1, 2021
d9bdc84
IGNITE-13805 Moved restore context to inner class.
xtern Mar 2, 2021
841740f
IGNITE-13805 (minor) Code cleanup.
xtern Mar 4, 2021
db7b5bb
IGNITE-13805 Code cleanup (remove resolveCacheDir)
xtern Mar 4, 2021
e8bafdd
IGNITE-13805 Review notes.
xtern Mar 5, 2021
9c5f55d
IGNITE-13805 Remove verification task,
xtern Mar 10, 2021
3b6188a
IGNITE-13805 Review note.
xtern Mar 10, 2021
a722836
IGNITE-13805 Don't store cache dirs in context.
xtern Mar 12, 2021
18337b3
IGNITE-13805 Simplify restore context.
xtern Mar 15, 2021
c4f8626
IGNITE-13805 Code cleanup.
xtern Mar 16, 2021
2eefae8
IGNITE-13805 Catch ignitecheckedexception only in prepare.
xtern Mar 16, 2021
64c92ac
IGNITE-13805 Review notes.
xtern Mar 17, 2021
154e079
IGNITE-13805 Additional errors logging.
xtern Mar 17, 2021
4d1f056
IGNITE-13805 More complicated concurrency.
xtern Mar 18, 2021
1afc06d
IGNITE-13805 checkFailure -> checkNodeLeft
xtern Mar 18, 2021
310312b
IGNITE-13805 Removed redundant rollback request/respnses.
xtern Mar 18, 2021
139ed63
IGNITE-13805 Code cleanup.
xtern Mar 18, 2021
72984be
IGNITE-13805 Simplify restoreAsync (exp).
xtern Mar 19, 2021
295a446
IGNITE-13805 Remove redundant rollback.
xtern Mar 19, 2021
c5d0791
IGNITE-13805 (minor) Code cleanup.
xtern Mar 19, 2021
e2ae0c1
IGNITE-13805 Track only required node failures.
xtern Mar 19, 2021
cbc0cbb
IGNITE-13805 (minor) code cleanup.
xtern Mar 22, 2021
2d72c13
IGNITE-13805 Code cleanup.
xtern Mar 23, 2021
df469ca
IGNITE-13805 Sync diff snapshot ops startup.
xtern Mar 23, 2021
63256dc
IGNITE-13805 (minor) Code cleanup.
xtern Mar 23, 2021
67c4cf9
IGNITE-13805 Don;t sync snapshot oper startup.
xtern Mar 23, 2021
db3f917
IGNITE-13805 Complete user future in diff thread pool.
xtern Mar 23, 2021
18b60d5
IGNITE-13805 Move rollback in diff thread.
xtern Mar 24, 2021
43bd1d6
IGNITE-13805 Graceful shutdown (wip).
xtern Mar 24, 2021
c979cc8
IGNITE-13805 Graceful shutdown (wip2).
xtern Mar 24, 2021
dc225b8
IGNITE-13805 Graceful shutdown (wip3).
xtern Mar 25, 2021
c44bffb
IGNITE-13805 (minor) COde cleanup.
xtern Mar 26, 2021
df3032e
IGNITE-13805 No need to check stopping flag in prepare.
xtern Mar 26, 2021
a13341d
IGNITE-13805 (minor) restored -> internal
xtern Mar 28, 2021
50cd434
IGNITE-13805 Improve stop sync.
xtern Apr 5, 2021
fd9b9b7
IGNITE-13805 Review notes.
xtern Apr 5, 2021
50165a3
IGNITE-13805 Remove stopped flag.
xtern Apr 5, 2021
db846c5
IGNITE-13805 Review notes.
xtern Apr 5, 2021
ee9888c
IGNITE-13805 Added sync for rollback.
xtern Apr 5, 2021
1cf1c29
IGNITE-13805 Remove stop/deactivater meths.
xtern Apr 5, 2021
135e359
IGNITE-13805 Bugfix.
xtern Apr 5, 2021
b6b12c1
IGNITE-13805 Rollback logging improvement
xtern Apr 5, 2021
a446660
IGNITE-13805 Code cleanup.
xtern Apr 5, 2021
e6ac5af
IGNITE-13805 User ClusterSnapshotFuture.
xtern Apr 5, 2021
2807e84
IGNITE-13805 Duplicated operation request.
xtern Apr 6, 2021
2233121
IGNITE-13805 (minor) Code cleanup.
xtern Apr 6, 2021
4747d2c
IGNITE-13805 (minor) Test code cleanup.
xtern Apr 6, 2021
24b5d91
IGNITE-13805 (minor) Code cleanup.
xtern Apr 6, 2021
288cf8d
IGNITE-13805 (minor) Code cleanup.
xtern Apr 14, 2021
b955d5c
IGNITE-13805 Check required nodes alive.
xtern Apr 14, 2021
dc32d99
IGNITE-13805 (minor) Test code cleanup.
xtern Apr 14, 2021
86011cb
IGNITE-13805 Use temp dir to copy files on prepare. Cleanup temp dirs…
xtern Apr 15, 2021
da28d46
IGNITE-13805 Start multithreaded tests.
xtern Apr 15, 2021
13136de
IGNITE-13805 (minor) Create snapshot test.
xtern Apr 15, 2021
a570502
IGNITE-13805 Restore all caches from the snapshot.
xtern Apr 16, 2021
bad2d2c
IGNITE-13805 Improved sql/index validation.
xtern Apr 16, 2021
fe16754
IGNITE-13805 Cache start on stable top only fix.
xtern Apr 19, 2021
74ab3cc
IGNITE-13805 Test for corrupted files.
xtern Apr 20, 2021
5b8da3a
IGNITE-13805 Simplify test.
xtern Apr 20, 2021
31fb3b1
IGNITE-13805 Split tests into core and indexing.
xtern Apr 21, 2021
0eab67c
IGNITE-13805 (minor) Snapshot test fix.
xtern Apr 23, 2021
42fa5e1
IGNITE-13805 (minor) Code cleanup.
xtern Apr 27, 2021
c6b6add
IGNITE-13805 Set exchangeLocE on cache start fail.
xtern Apr 28, 2021
32ea6e3
IGNITE-13805 Review notes.
xtern Apr 29, 2021
dcb35ab
IGNITE-13805 Added DynamicCacheStartFailsOnNodeLeftTest.
xtern Apr 29, 2021
5747add
IGNITE-13805 Make sure no index rebuild happened.
xtern Apr 29, 2021
8f138d0
IGNITE-13805 (minor) Javadoc improved, cache start test included into…
xtern Apr 29, 2021
59dcefa
IGNITE-13805 Use restartId instead of internal flag.
xtern May 13, 2021
4ee0cf6
IGNITE-13805 Code cleanup.
xtern May 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions modules/core/src/main/java/org/apache/ignite/IgniteSnapshot.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

package org.apache.ignite;

import java.util.Collection;
import org.apache.ignite.lang.IgniteFuture;
import org.jetbrains.annotations.Nullable;

/**
* This interface provides functionality for creating cluster-wide cache data snapshots.
Expand Down Expand Up @@ -48,4 +50,16 @@ public interface IgniteSnapshot {
* @return Future which will be completed when cancel operation finished.
*/
public IgniteFuture<Void> cancelSnapshot(String name);

/**
* Restore cache group(s) from the snapshot.
* <p>
* <b>NOTE:</b> Cache groups to be restored from the snapshot must not present in the cluster, if they present,
* they must be destroyed by the user (eg with {@link IgniteCache#destroy()}) before starting this operation.
*
* @param name Snapshot name.
* @param cacheGroupNames Cache groups to be restored or {@code null} to restore all cache groups from the snapshot.
* @return Future which will be completed when restore operation finished.
*/
public IgniteFuture<Void> restoreSnapshot(String name, @Nullable Collection<String> cacheGroupNames);
}
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,10 @@ public enum IgniteFeatures {
CACHE_GROUP_KEY_CHANGE(47),

/** Collecting performance statistics. */
PERFORMANCE_STATISTICS(48);
PERFORMANCE_STATISTICS(48),

/** Restore cache group from the snapshot. */
SNAPSHOT_RESTORE_CACHE_GROUP(49);

/**
* Unique feature identifier.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,15 @@
import org.apache.ignite.internal.IgniteFeatures;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.IgniteNodeAttributes;
import org.apache.ignite.internal.cluster.ClusterTopologyCheckedException;
import org.apache.ignite.internal.managers.discovery.DiscoCache;
import org.apache.ignite.internal.managers.discovery.IgniteDiscoverySpi;
import org.apache.ignite.internal.managers.encryption.GridEncryptionManager;
import org.apache.ignite.internal.managers.systemview.walker.CacheGroupViewWalker;
import org.apache.ignite.internal.managers.systemview.walker.CacheViewWalker;
import org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion;
import org.apache.ignite.internal.processors.cache.distributed.dht.IgniteClusterReadOnlyException;
import org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager;
import org.apache.ignite.internal.processors.cluster.ChangeGlobalStateFinishMessage;
import org.apache.ignite.internal.processors.cluster.ChangeGlobalStateMessage;
import org.apache.ignite.internal.processors.cluster.DiscoveryDataClusterState;
Expand Down Expand Up @@ -582,6 +584,28 @@ public boolean onCacheChangeRequested(DynamicCacheChangeBatch batch, AffinityTop
DiscoveryDataClusterState state = ctx.state().clusterState();

if (state.active() && !state.transition()) {
Set<IgniteUuid> restartIds = new HashSet<>(F.viewReadOnly(
batch.requests(), DynamicCacheChangeRequest::restartId, req -> req.start() && req.restartId() != null));

assert restartIds.size() <= 1 : batch.requests();

Collection<UUID> nodes = ctx.cache().context().snapshotMgr().cacheStartRequiredAliveNodes(F.first(restartIds));

for (UUID nodeId : nodes) {
ClusterNode node = ctx.discovery().node(nodeId);

if (node != null && CU.baselineNode(node, state) && ctx.discovery().alive(node))
continue;

ClusterTopologyCheckedException err =
new ClusterTopologyCheckedException("Required node has left the cluster [nodeId=" + nodeId + ']');

for (DynamicCacheChangeRequest req : batch.requests())
ctx.cache().completeCacheStartFuture(req, false, err);

return false;
}

ExchangeActions exchangeActions = new ExchangeActions();

CacheChangeProcessResult res = processCacheChangeRequests(exchangeActions,
Expand All @@ -593,6 +617,9 @@ public boolean onCacheChangeRequested(DynamicCacheChangeBatch batch, AffinityTop
assert !exchangeActions.empty() : exchangeActions;

batch.exchangeActions(exchangeActions);

if (!nodes.isEmpty())
exchangeActions.cacheStartRequiredAliveNodes(nodes);
}

return res.needExchange;
Expand Down Expand Up @@ -1007,6 +1034,16 @@ else if (encMgr.masterKeyDigest() != null &&
}
}

if (err == null && req.restartId() == null) {
IgniteSnapshotManager snapshotMgr = ctx.cache().context().snapshotMgr();

if (snapshotMgr.isRestoring(cacheName, ccfg.getGroupName())) {
err = new IgniteCheckedException("Cache start failed. A cache or group with the same name is " +
"currently being restored from a snapshot [cache=" + cacheName +
(ccfg.getGroupName() == null ? "" : ", group=" + ccfg.getGroupName()) + ']');
}
}

if (err != null) {
if (persistedCfgs)
res.errs.add(err);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import org.apache.ignite.internal.util.typedef.C1;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.internal.CU;
Expand All @@ -43,6 +44,12 @@ public class ExchangeActions {
/** */
private Map<String, CacheActionData> cachesToStart;

/**
* Server nodes on which a successful start of the cache(s) is required, if any of these nodes fails when starting
* the cache(s), the whole procedure is rolled back.
*/
private Collection<UUID> cacheStartRequiredAliveNodes;

/** */
private Map<String, CacheActionData> cachesToStop;

Expand Down Expand Up @@ -319,6 +326,23 @@ public boolean cacheGroupStarting(int grpId) {
return false;
}

/**
* @return Server nodes on which a successful start of the cache(s) is required, if any of these nodes fails when
* starting the cache(s), the whole procedure is rolled back.
*/
public Collection<UUID> cacheStartRequiredAliveNodes() {
return cacheStartRequiredAliveNodes == null ? Collections.emptyList() : cacheStartRequiredAliveNodes;
}

/**
* @param cacheStartRequiredAliveNodes Server nodes on which a successful start of the cache(s) is required, if any
* of these nodes fails when starting the cache(s), the whole procedure is
* rolled back.
*/
public void cacheStartRequiredAliveNodes(Collection<UUID> cacheStartRequiredAliveNodes) {
this.cacheStartRequiredAliveNodes = new ArrayList<>(cacheStartRequiredAliveNodes);
}

/**
* @param grpDesc Group descriptor.
* @param destroy Destroy flag.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4268,6 +4268,9 @@ else if (msg0 instanceof WalStateFinishMessage)
if (res == null)
res = validateRestartingCaches(node);

if (res == null)
res = validateRestoringCaches(node);

return res;
}

Expand All @@ -4294,6 +4297,20 @@ private IgniteNodeValidationResult validateRestartingCaches(ClusterNode node) {
return null;
}

/**
* @param node Joining node to validate.
* @return Node validation result if there was an issue with the joining node, {@code null} otherwise.
*/
private IgniteNodeValidationResult validateRestoringCaches(ClusterNode node) {
if (ctx.cache().context().snapshotMgr().isRestoring()) {
String msg = "Joining node during caches restore is not allowed [joiningNodeId=" + node.id() + ']';

return new IgniteNodeValidationResult(node.id(), msg);
}

return null;
}

/**
* @return Keep static cache configuration flag. If {@code true}, static cache configuration will override
* configuration persisted on disk.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ public GridCacheSharedContext(

stateAwareMgrs.add(snpMgr);

stateAwareMgrs.add(snapshotMgr);

for (PluginProvider prv : kernalCtx.plugins().allProviders())
if (prv instanceof IgniteChangeGlobalStateSupport)
stateAwareMgrs.add(((IgniteChangeGlobalStateSupport)prv));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.function.BooleanSupplier;
import javax.cache.CacheException;
import org.apache.ignite.IgniteBinary;
import org.apache.ignite.IgniteCheckedException;
Expand All @@ -55,6 +56,7 @@
import org.apache.ignite.internal.IgniteFeatures;
import org.apache.ignite.internal.IgniteFutureTimeoutCheckedException;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.IgniteInterruptedCheckedException;
import org.apache.ignite.internal.IgniteNodeAttributes;
import org.apache.ignite.internal.NodeStoppingException;
import org.apache.ignite.internal.UnregisteredBinaryTypeException;
Expand Down Expand Up @@ -992,6 +994,42 @@ public BinaryMetadata binaryMetadata(int typeId) throws BinaryObjectException {
}
}

/** {@inheritDoc} */
@Override public void updateMetadata(File metadataDir, BooleanSupplier stopChecker) throws IgniteCheckedException {
if (!metadataDir.exists())
return;

try {
ConcurrentMap<Integer, BinaryMetadataHolder> metaCache = new ConcurrentHashMap<>();

new BinaryMetadataFileStore(metaCache, ctx, log, metadataDir)
.restoreMetadata();

Collection<BinaryMetadata> metadata = F.viewReadOnly(metaCache.values(), BinaryMetadataHolder::metadata);

// Check the compatibility of the binary metadata.
for (BinaryMetadata newMeta : metadata) {
BinaryMetadata oldMeta = binaryMetadata(newMeta.typeId());

if (oldMeta != null)
BinaryUtils.mergeMetadata(oldMeta, newMeta, null);
}

// Update cluster metadata.
for (BinaryMetadata newMeta : metadata) {
if (stopChecker.getAsBoolean())
return;

if (Thread.interrupted())
throw new IgniteInterruptedCheckedException("Thread has been interrupted.");

addMeta(newMeta.typeId(), newMeta.wrap(binaryContext()), false);
}
} catch (BinaryObjectException e) {
throw new IgniteCheckedException(e);
}
}

/** {@inheritDoc} */
@Override public BinaryObject buildEnum(String typeName, int ord) throws BinaryObjectException {
A.notNullOrEmpty(typeName, "enum type name");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ public class GridDhtPartitionsExchangeFuture extends GridDhtTopologyFutureAdapte
private boolean forceAffReassignment;

/** Exception that was thrown during init phase on local node. */
private Exception exchangeLocE;
private volatile Exception exchangeLocE;

/** Exchange exceptions from all participating nodes. */
private final Map<UUID, Exception> exchangeGlobalExceptions = new ConcurrentHashMap<>();
Expand Down Expand Up @@ -5126,6 +5126,12 @@ public void onNodeLeft(final ClusterNode node) {

if (crd0 == null)
finishState = new FinishState(null, initialVersion(), null);

if (dynamicCacheStartExchange() && exchangeLocE == null &&
exchActions.cacheStartRequiredAliveNodes().contains(node.id())) {
exchangeGlobalExceptions.put(cctx.localNodeId(), exchangeLocE = new ClusterTopologyCheckedException(
"Required node has left the cluster [nodeId=" + node.id() + ']'));
}
}

if (crd0 == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -974,29 +974,37 @@ public void readConfigurationFiles(List<CacheConfiguration<?, ?>> ccfgs,
Arrays.sort(files);

for (File file : files) {
if (file.isDirectory()) {
if (file.getName().startsWith(CACHE_DIR_PREFIX)) {
File conf = new File(file, CACHE_DATA_FILENAME);
if (file.isDirectory())
readCacheConfigurations(file, ccfgs);
}

if (conf.exists() && conf.length() > 0) {
StoredCacheData cacheData = readCacheData(conf);
return ccfgs;
}

String cacheName = cacheData.config().getName();
/**
* @param dir Cache (group) directory.
* @param ccfgs Cache configurations.
* @throws IgniteCheckedException If failed.
*/
public void readCacheConfigurations(File dir, Map<String, StoredCacheData> ccfgs) throws IgniteCheckedException {
if (dir.getName().startsWith(CACHE_DIR_PREFIX)) {
File conf = new File(dir, CACHE_DATA_FILENAME);

if (!ccfgs.containsKey(cacheName))
ccfgs.put(cacheName, cacheData);
else {
U.warn(log, "Cache with name=" + cacheName + " is already registered, skipping config file "
+ file.getName());
}
}
if (conf.exists() && conf.length() > 0) {
StoredCacheData cacheData = readCacheData(conf);

String cacheName = cacheData.config().getName();

if (!ccfgs.containsKey(cacheName))
ccfgs.put(cacheName, cacheData);
else {
U.warn(log, "Cache with name=" + cacheName + " is already registered, skipping config file "
+ dir.getName());
}
else if (file.getName().startsWith(CACHE_GRP_DIR_PREFIX))
readCacheGroupCaches(file, ccfgs);
}
}

return ccfgs;
else if (dir.getName().startsWith(CACHE_GRP_DIR_PREFIX))
readCacheGroupCaches(dir, ccfgs);
}

/**
Expand Down
Loading