diff --git a/modules/benchmarks/src/main/java/org/apache/ignite/internal/benchmarks/jmh/pagemem/JmhBatchUpdatesBenchmark.java b/modules/benchmarks/src/main/java/org/apache/ignite/internal/benchmarks/jmh/pagemem/JmhBatchUpdatesBenchmark.java new file mode 100644 index 0000000000000..a6e565f337132 --- /dev/null +++ b/modules/benchmarks/src/main/java/org/apache/ignite/internal/benchmarks/jmh/pagemem/JmhBatchUpdatesBenchmark.java @@ -0,0 +1,423 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.benchmarks.jmh.pagemem; + +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import org.apache.ignite.IgniteCheckedException; +import org.apache.ignite.IgniteException; +import org.apache.ignite.Ignition; +import org.apache.ignite.cache.CacheAtomicityMode; +import org.apache.ignite.cache.CacheMode; +import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.DataRegionConfiguration; +import org.apache.ignite.configuration.DataStorageConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.processors.cache.CacheObject; +import org.apache.ignite.internal.processors.cache.GridCacheContext; +import org.apache.ignite.internal.processors.cache.GridCacheEntryInfo; +import org.apache.ignite.internal.processors.cache.KeyCacheObject; +import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionDemander; +import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader; +import org.apache.ignite.logger.NullLogger; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * Batch updates in pagemem through preloader. + * + * todo benchmark for internal testing purposes. + */ +@BenchmarkMode(Mode.AverageTime) +@Fork(value = 1, jvmArgsAppend = {"-Xms3g", "-Xmx3g", "-server", "-XX:+AggressiveOpts", "-XX:MaxMetaspaceSize=256m"}) +@Measurement(iterations = 11) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +@Threads(1) +@Warmup(iterations = 15) +public class JmhBatchUpdatesBenchmark { + /** */ + private static final long DEF_REG_SIZE = 3 * 1024 * 1024 * 1024L; + + /** */ + private static final int BATCH_SIZE = 500; + + /** */ + private static final String REG_BATCH = "batch-region"; + + /** */ + private static final String REG_SINGLE = "single-region"; + + /** */ + private static final String CACHE_BATCH = "batch"; + + /** */ + private static final String CACHE_SINGLE = "single"; + + /** */ + private static final String NODE_NAME = "srv0"; + + /** */ + private static int iteration = 0; + + /** */ + public enum RANGE { + /** */ + r0_4(0, 4), + + /** */ + r4_16(4, 16), + + /** */ + r16_64(16, 64), + + /** */ + r100_200(100, 200), + + /** */ + r200_500(200, 500), + + /** */ + r500_800(500, 800), + + /** */ + r800_1200(800, 1200), + + /** */ + r2000_3000(2_000, 3_000), + + /** */ + r1000_8000(1_000, 8_000), + + /** Large objects only. */ + r4000_16000(4_000, 16_000), + + /** Mixed objects, mostly large objects. */ + r0_32000(100, 32_000); + + /** */ + private final int min; + + /** */ + private final int max; + + /** */ + RANGE(int min, int max) { + this.min = min; + this.max = max; + } + } + + + /** + * Create Ignite configuration. + * + * @return Ignite configuration. + */ + private IgniteConfiguration getConfiguration(String cfgName) { + IgniteConfiguration cfg = new IgniteConfiguration(); + + cfg.setGridLogger(new NullLogger()); + + cfg.setIgniteInstanceName(cfgName); + + DataRegionConfiguration reg1 = new DataRegionConfiguration(); + reg1.setInitialSize(DEF_REG_SIZE); + reg1.setMaxSize(DEF_REG_SIZE); + reg1.setName(REG_BATCH); + + DataRegionConfiguration reg2 = new DataRegionConfiguration(); + reg2.setInitialSize(DEF_REG_SIZE); + reg2.setMaxSize(DEF_REG_SIZE); + reg2.setName(REG_SINGLE); + + DataStorageConfiguration storeCfg = new DataStorageConfiguration(); + + storeCfg.setDataRegionConfigurations(reg1, reg2); + + cfg.setDataStorageConfiguration(storeCfg); + + cfg.setCacheConfiguration(ccfg(false), ccfg(true)); + + return cfg; + } + + /** + * @return Cache configuration. + */ + private CacheConfiguration ccfg(boolean batch) { + return new CacheConfiguration(batch ? CACHE_BATCH : CACHE_SINGLE) + .setAffinity(new RendezvousAffinityFunction(false, 1)) + .setCacheMode(CacheMode.REPLICATED) + .setAtomicityMode(CacheAtomicityMode.ATOMIC) + .setDataRegionName(batch ? REG_BATCH : REG_SINGLE); + } + + /** + * Test single updates. + * + * @param data Data that will be preloaded. + * @param preloader Data preloader. + */ + @Benchmark + public void checkSingle(Data data, Preloader preloader) throws IgniteCheckedException { + preloader.demanderSingle.preloadEntriesSingle(null, 0, data.singleData, data.cctxSingle.topology().readyTopologyVersion()); + } + + /** + * Test batch updates. + * + * @param data Data that will be preloaded. + * @param preloader Data preloader. + */ + @Benchmark + public void checkBatch(Data data, Preloader preloader) throws IgniteCheckedException { + preloader.demanderBatch.preloadEntriesBatch(null, 0, data.batchData, data.cctxBatch.topology().readyTopologyVersion()); + } + + + /** + * Start 2 servers and 1 client. + */ + @Setup(Level.Trial) + public void setup() { + Ignition.start(getConfiguration(NODE_NAME)); + } + + /** + * Stop all grids after tests. + */ + @TearDown(Level.Trial) + public void tearDown() { + Ignition.stopAll(true); + } + + /** + * Create streamer on client cache. + */ + @State(Scope.Benchmark) + public static class Preloader { + /** */ + final GridDhtPartitionDemander demanderBatch = demander(CACHE_BATCH); + + /** */ + final GridDhtPartitionDemander demanderSingle = demander(CACHE_SINGLE); + + /** */ + GridDhtPartitionDemander demander(String name) { + GridCacheContext cctx = ((IgniteEx)Ignition.ignite(NODE_NAME)).cachex(name).context(); + + GridDhtPreloader preloader = (GridDhtPreloader)cctx.group().preloader(); + + return getFieldValue(preloader, "demander"); + } + + /** + * Get object field value via reflection. + * + * @param obj Object or class to get field value from. + * @param fieldNames Field names to get value for: obj->field1->field2->...->fieldN. + * @param Expected field class. + * @return Field value. + * @throws IgniteException In case of error. + */ + public static T getFieldValue(Object obj, String... fieldNames) throws IgniteException { + assert obj != null; + assert fieldNames != null; + assert fieldNames.length >= 1; + + try { + for (String fieldName : fieldNames) { + Class cls = obj instanceof Class ? (Class)obj : obj.getClass(); + + try { + obj = findField(cls, obj, fieldName); + } + catch (NoSuchFieldException e) { + throw new RuntimeException(e); + } + } + + return (T)obj; + } + catch (IllegalAccessException e) { + throw new IgniteException("Failed to get object field [obj=" + obj + + ", fieldNames=" + Arrays.toString(fieldNames) + ']', e); + } + } + + /** + * @param cls Class for searching. + * @param obj Target object. + * @param fieldName Field name for search. + * @return Field from object if it was found. + */ + private static Object findField(Class cls, Object obj, + String fieldName) throws NoSuchFieldException, IllegalAccessException { + // Resolve inner field. + Field field = cls.getDeclaredField(fieldName); + + boolean accessible = field.isAccessible(); + + if (!accessible) + field.setAccessible(true); + + return field.get(obj); + } + } + + /** + * Prepare and clean collection with streaming data. + */ + @State(Scope.Thread) + public static class Data { + /** */ + @Param + private RANGE range; + + /** */ + private int[] sizes; + + /** */ + Collection batchData = new ArrayList<>(BATCH_SIZE); + + /** */ + Collection singleData = new ArrayList<>(BATCH_SIZE); + + /** */ + GridCacheContext cctxBatch = ((IgniteEx)Ignition.ignite(NODE_NAME)).cachex(CACHE_BATCH).context(); + + /** */ + GridCacheContext cctxSingle = ((IgniteEx)Ignition.ignite(NODE_NAME)).cachex(CACHE_SINGLE).context(); + + /** */ + @Setup(Level.Trial) + public void setup() { + sizes = sizes(range.min, range.max, BATCH_SIZE); + } + + /** + * Prepare collection. + */ + @Setup(Level.Iteration) + public void prepare() { + int iter = iteration++; + + int off = iter * BATCH_SIZE; + + batchData = prepareBatch(cctxBatch, off, BATCH_SIZE, sizes); + singleData = prepareBatch(cctxSingle, off, BATCH_SIZE, sizes); + } + + /** + * Clean collection after each test. + */ + @TearDown(Level.Iteration) + public void cleanCollection() { + batchData = null; + singleData = null; + } + + /** */ + int[] sizes(int minObjSize, int maxObjSize, int batchSize) { + int sizes[] = new int[batchSize]; + int minSize = maxObjSize; + int maxSize = minObjSize; + + int delta = maxObjSize - minObjSize; + + for (int i = 0; i < batchSize; i++) { + int size = sizes[i] = minObjSize + (delta > 0 ? ThreadLocalRandom.current().nextInt(delta) : 0); + + if (size < minSize) + minSize = size; + + if (size > maxSize) + maxSize = size; + } + + return sizes; + } + + /** + * Generates rebalance info objects. + * + * @param cctx Cache context. + * @param off Offset. + * @param cnt Count. + * @param sizes Object sizes. + * @return List of generated objects. + */ + private List prepareBatch(GridCacheContext cctx, int off, int cnt, int[] sizes) { + List infos = new ArrayList<>(); + + for (int i = off; i < off + cnt; i++) { + int size = sizes[i - off]; + + KeyCacheObject key = cctx.toCacheKeyObject(i); + CacheObject val = cctx.toCacheObject(new byte[size]); + + GridCacheEntryInfo info = new GridCacheEntryInfo(); + info.key(key); + info.value(val); + info.cacheId(cctx.cacheId()); + info.version(cctx.shared().versions().startVersion()); + + infos.add(info); + } + + return infos; + } + } + + /** + * Run benchmark. + * + * @param args Args. + */ + public static void main(String[] args) throws RunnerException { + final Options options = new OptionsBuilder() + .include(JmhBatchUpdatesBenchmark.class.getSimpleName()) + .build(); + + new Runner(options).run(); + } +} diff --git a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java index 9f1e063a90d02..6f9536948a4e3 100644 --- a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java +++ b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java @@ -1095,6 +1095,9 @@ public final class IgniteSystemProperties { */ public static final String IGNITE_DISCOVERY_DISABLE_CACHE_METRICS_UPDATE = "IGNITE_DISCOVERY_DISABLE_CACHE_METRICS_UPDATE"; + /** */ + public static final String IGNITE_DATA_STORAGE_BATCH_PAGE_WRITE = "IGNITE_DATA_STORAGE_BATCH_PAGE_WRITE"; + /** * Maximum number of different partitions to be extracted from between expression within sql query. * In case of limit exceeding all partitions will be used. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/BatchedCacheEntries.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/BatchedCacheEntries.java new file mode 100644 index 0000000000000..b8b2e3590922f --- /dev/null +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/BatchedCacheEntries.java @@ -0,0 +1,479 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.processors.cache; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Set; +import org.apache.ignite.IgniteCheckedException; +import org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion; +import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheEntry; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtInvalidPartitionException; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition; +import org.apache.ignite.internal.processors.cache.persistence.CacheDataRow; +import org.apache.ignite.internal.processors.cache.persistence.CacheSearchRow; +import org.apache.ignite.internal.processors.cache.tree.DataRow; +import org.apache.ignite.internal.processors.cache.version.GridCacheVersion; +import org.apache.ignite.internal.processors.dr.GridDrType; +import org.apache.ignite.internal.util.IgniteTree; +import org.apache.ignite.internal.util.typedef.T2; +import org.apache.ignite.internal.util.typedef.T3; +import org.apache.ignite.internal.util.typedef.internal.CU; +import org.apache.ignite.internal.util.typedef.internal.U; +import org.jetbrains.annotations.Nullable; + +import static org.apache.ignite.internal.processors.cache.GridCacheMapEntry.ATOMIC_VER_COMPARATOR; +import static org.apache.ignite.internal.util.IgniteTree.OperationType.NOOP; +import static org.apache.ignite.internal.util.IgniteTree.OperationType.PUT; +import static org.apache.ignite.internal.util.IgniteTree.OperationType.REMOVE; + +/** + * Batch of cache entries to optimize page memory processing. + */ +public class BatchedCacheEntries { + /** */ + private final GridDhtLocalPartition part; + + /** */ + private final GridCacheContext cctx; + + /** */ + private final LinkedHashMap infos = new LinkedHashMap<>(); + + /** */ + private final AffinityTopologyVersion topVer; + + /** */ + private final boolean preload; + + /** */ + private List entries; + + /** */ + private int skipped; + + /** */ + public BatchedCacheEntries(AffinityTopologyVersion topVer, int partId, GridCacheContext cctx, boolean preload) { + this.topVer = topVer; + this.cctx = cctx; + this.preload = preload; + this.part = cctx.topology().localPartition(partId, topVer, true, true); + } + + /** */ + public void addEntry(KeyCacheObject key, CacheObject val, long expTime, long ttl, GridCacheVersion ver, GridDrType drType) { + // todo remove `key` duplication (Map keys() { + return infos.keySet(); + } + + /** */ + public Collection values() { + return infos.values(); + } + + /** */ + public GridDhtLocalPartition part() { + return part; + } + + /** */ + public GridCacheContext context() { + return cctx; + } + + /** */ + public CacheMapEntryInfo get(KeyCacheObject key) { + return infos.get(key); + } + + /** */ + public boolean preload() { + return preload; + } + + /** */ + public void onRemove(KeyCacheObject key) { + // todo - remove from original collection + ++skipped; + } + + /** */ + public void onError(KeyCacheObject key, IgniteCheckedException e) { + // todo - remove from original collection + ++skipped; + } + + /** */ + public boolean skip(KeyCacheObject key) { + // todo + return false; + } + + /** */ + public List lock() { + return entries = lockEntries(infos.values(), topVer); + } + + /** */ + public void unlock() { + unlockEntries(infos.values(), topVer); + } + + /** */ + public int size() { + return infos.size() - skipped; + } + + /** */ + private List lockEntries(Collection list, AffinityTopologyVersion topVer) + throws GridDhtInvalidPartitionException { + List locked = new ArrayList<>(list.size()); + + while (true) { + for (CacheMapEntryInfo info : list) { + GridDhtCacheEntry entry = (GridDhtCacheEntry)cctx.cache().entryEx(info.key(), topVer); + + locked.add(entry); + + info.cacheEntry(entry); + } + + boolean retry = false; + + for (int i = 0; i < locked.size(); i++) { + GridCacheMapEntry entry = locked.get(i); + + if (entry == null) + continue; + + // todo ensure free space + // todo check obsolete + + entry.lockEntry(); + + if (entry.obsolete()) { + // Unlock all locked. + for (int j = 0; j <= i; j++) { + if (locked.get(j) != null) + locked.get(j).unlockEntry(); + } + + // Clear entries. + locked.clear(); + + // Retry. + retry = true; + + break; + } + } + + if (!retry) + return locked; + } + } + + /** + * Releases java-level locks on cache entries + * todo carefully think about possible reorderings in locking/unlocking. + * + * @param locked Locked entries. + * @param topVer Topology version. + */ + private void unlockEntries(Collection locked, AffinityTopologyVersion topVer) { + // Process deleted entries before locks release. + assert cctx.deferredDelete() : this; + + // Entries to skip eviction manager notification for. + // Enqueue entries while holding locks. + // todo Common skip list. + Collection skip = null; + + int size = locked.size(); + + try { + for (CacheMapEntryInfo info : locked) { + GridCacheMapEntry entry = info.cacheEntry(); + + if (entry != null && entry.deleted()) { + if (skip == null) + skip = U.newHashSet(locked.size()); + + skip.add(entry.key()); + } + + try { + info.updateCacheEntry(); + } catch (IgniteCheckedException e) { + skip.add(entry.key()); + } + } + } + finally { + // At least RuntimeException can be thrown by the code above when GridCacheContext is cleaned and there is + // an attempt to use cleaned resources. + // That's why releasing locks in the finally block.. + for (CacheMapEntryInfo info : locked) { + GridCacheMapEntry entry = info.cacheEntry(); + if (entry != null) + entry.unlockEntry(); + } + } + + // Try evict partitions. + for (CacheMapEntryInfo info : locked) { + GridDhtCacheEntry entry = info.cacheEntry(); + if (entry != null) + entry.onUnlock(); + } + + if (skip != null && skip.size() == size) + // Optimization. + return; + + // Must touch all entries since update may have deleted entries. + // Eviction manager will remove empty entries. + for (CacheMapEntryInfo info : locked) { + GridCacheMapEntry entry = info.cacheEntry(); + if (entry != null && (skip == null || !skip.contains(entry.key()))) + entry.touch(); + } + } + + /** */ + public class BatchUpdateClosure implements IgniteCacheOffheapManager.OffheapInvokeAllClosure { + /** */ + private final List> resBatch = new ArrayList<>(entries.size()); + + /** */ + private final int cacheId = context().group().storeCacheIdInDataPage() ? context().cacheId() : CU.UNDEFINED_CACHE_ID; + + /** */ + private final int partId = part().id(); + + /** {@inheritDoc} */ + @Override public void call(@Nullable Collection> rows) throws IgniteCheckedException { + List newRows = new ArrayList<>(16); + + for (T2 t2 : rows) { + CacheDataRow oldRow = t2.get1(); + + KeyCacheObject key = t2.get2().key(); + + CacheMapEntryInfo newRowInfo = get(key); + + try { + if (newRowInfo.needUpdate(oldRow)) { + CacheDataRow newRow; + + CacheObject val = newRowInfo.value(); + + if (val != null) { + if (oldRow != null) { + // todo think about batch updates + newRow = context().offheap().dataStore(part()).createRow( + context(), + key, + newRowInfo.value(), + newRowInfo.version(), + newRowInfo.expireTime(), + oldRow); + } + else { + CacheObjectContext coCtx = context().cacheObjectContext(); + // todo why we need this + val.valueBytes(coCtx); + key.valueBytes(coCtx); + + if (key.partition() == -1) + key.partition(partId); + + newRow = new DataRow(key, val, newRowInfo.version(), partId, newRowInfo.expireTime(), cacheId); + + newRows.add(newRow); + } + + IgniteTree.OperationType treeOp = oldRow != null && oldRow.link() == newRow.link() ? + NOOP : PUT; + + resBatch.add(new T3<>(treeOp, oldRow, newRow)); + } + else { + // todo we should pass key somehow to remove old row (because in particular case oldRow should not contain key) + newRow = new DataRow(key, null, null, 0, 0, 0); + + resBatch.add(new T3<>(oldRow != null ? REMOVE : NOOP, oldRow, newRow)); + } + } + } + catch (GridCacheEntryRemovedException e) { + onRemove(key); + } + } + + if (!newRows.isEmpty()) + context().offheap().dataStore(part()).rowStore().addRows(newRows, cctx.group().statisticsHolderData()); + } + + /** {@inheritDoc} */ + @Override public Collection> result() { + return resBatch; + } + + /** {@inheritDoc} */ + @Override public boolean apply(CacheDataRow row) { + return false; + } + } + + /** */ + public static class CacheMapEntryInfo { + /** todo think about remove */ + private final BatchedCacheEntries batch; + + /** */ + private final KeyCacheObject key; + + /** */ + private final CacheObject val; + + /** */ + private final long expTime; + + /** */ + private final long ttl; + + /** */ + private final GridCacheVersion ver; + + /** */ + private final GridDrType drType; + + /** */ + private GridDhtCacheEntry entry; + + /** */ + private boolean update; + + /** */ + public CacheMapEntryInfo( + BatchedCacheEntries batch, + KeyCacheObject key, + CacheObject val, + long expTime, + long ttl, + GridCacheVersion ver, + GridDrType drType + ) { + this.batch = batch; + this.key = key; + this.val = val; + this.expTime = expTime; + this.ver = ver; + this.drType = drType; + this.ttl = ttl; + } + + /** + * @return Key. + */ + public KeyCacheObject key() { + return key; + } + + /** + * @return Version. + */ + public GridCacheVersion version() { + return ver; + } + + /** + * @return Value. + */ + public CacheObject value() { + return val; + } + + /** + * @return Expire time. + */ + public long expireTime() { + return expTime; + } + + /** + * @param entry Cache entry. + */ + public void cacheEntry(GridDhtCacheEntry entry) { + this.entry = entry; + } + + /** + * @return Cache entry. + */ + public GridDhtCacheEntry cacheEntry() { + return entry; + } + + /** */ + public void updateCacheEntry() throws IgniteCheckedException { + if (!update) + return; + + entry.finishInitialUpdate(val, expTime, ttl, ver, batch.topVer, drType, null, batch.preload); + } + + /** */ + public boolean needUpdate(CacheDataRow row) throws GridCacheEntryRemovedException { + GridCacheVersion currVer = row != null ? row.version() : entry.version(); + + GridCacheContext cctx = batch.context(); + + boolean isStartVer = cctx.versions().isStartVersion(currVer); + + boolean update0; + + if (cctx.group().persistenceEnabled()) { + if (!isStartVer) { + if (cctx.atomic()) + update0 = ATOMIC_VER_COMPARATOR.compare(currVer, version()) < 0; + else + update0 = currVer.compareTo(version()) < 0; + } + else + update0 = true; + } + else + update0 = (isStartVer && row == null); + + update0 |= (!batch.preload() && entry.deletedUnlocked()); + + update = update0; + + return update0; + } + } +} diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheMetricsImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheMetricsImpl.java index 8ce21c59de3d1..aa88b2e1bb04e 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheMetricsImpl.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheMetricsImpl.java @@ -1163,6 +1163,15 @@ public void onRebalanceKeyReceived() { rebalancingKeysRate.onHit(); } + /** + * Rebalance entry store callback. + */ + public void onRebalanceKeysReceived(long batchSize) { + rebalancedKeys.addAndGet(batchSize); + + rebalancingKeysRate.onHits(batchSize); + } + /** * Rebalance supply message callback. * diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java index a3eda189b8518..b15da6b8db03f 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java @@ -3504,6 +3504,102 @@ else if (deletedUnlocked()) } } + /** + * todo explain this and remove code duplication + * @param val New value. + * @param expireTime Expiration time. + * @param ttl Time to live. + * @param ver Version to use. + * @param topVer Topology version. + * @param drType DR type. + * @param mvccVer Mvcc version. + * @param preload Flag indicating whether entry is being preloaded. + * @throws IgniteCheckedException In case of error. + */ + protected void finishInitialUpdate( + @Nullable CacheObject val, + long expireTime, + long ttl, + GridCacheVersion ver, + AffinityTopologyVersion topVer, + GridDrType drType, + MvccVersion mvccVer, + boolean preload + ) throws IgniteCheckedException { + boolean fromStore = false; + boolean walEnabled = !cctx.isNear() && cctx.group().persistenceEnabled() && cctx.group().walEnabled(); + + update(val, expireTime, ttl, ver, true); + + boolean skipQryNtf = false; + + if (val == null) { + skipQryNtf = true; + + if (cctx.deferredDelete() && !deletedUnlocked() && !isInternal()) + deletedUnlocked(true); + } + else if (deletedUnlocked()) + deletedUnlocked(false); + + long updateCntr = 0; + + if (!preload) + updateCntr = nextPartitionCounter(topVer, true, null); + + if (walEnabled) { + if (cctx.mvccEnabled()) { + cctx.shared().wal().log(new MvccDataRecord(new MvccDataEntry( + cctx.cacheId(), + key, + val, + val == null ? DELETE : GridCacheOperation.CREATE, + null, + ver, + expireTime, + partition(), + updateCntr, + mvccVer == null ? MvccUtils.INITIAL_VERSION : mvccVer + ))); + } else { + cctx.shared().wal().log(new DataRecord(new DataEntry( + cctx.cacheId(), + key, + val, + val == null ? DELETE : GridCacheOperation.CREATE, + null, + ver, + expireTime, + partition(), + updateCntr + ))); + } + } + + drReplicate(drType, val, ver, topVer); + + if (!skipQryNtf) { + cctx.continuousQueries().onEntryUpdated( + key, + val, + null, + this.isInternal() || !this.context().userCache(), + this.partition(), + true, + true, + updateCntr, + null, + topVer); + } + + onUpdateFinished(updateCntr); + + if (!fromStore && cctx.store().isLocal()) { + if (val != null) + cctx.store().put(null, key, val, ver); + } + } + /** * @param cntr Updated partition counter. */ diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManager.java index b7e8ec717fc38..c9dace3ae682d 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManager.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManager.java @@ -17,6 +17,7 @@ package org.apache.ignite.internal.processors.cache; +import java.util.Collection; import java.util.List; import java.util.Map; import javax.cache.Cache; @@ -47,6 +48,7 @@ import org.apache.ignite.internal.util.lang.GridIterator; import org.apache.ignite.internal.util.lang.IgniteInClosure2X; import org.apache.ignite.lang.IgniteBiTuple; +import org.apache.ignite.lang.IgnitePredicate; import org.jetbrains.annotations.Nullable; /** @@ -188,6 +190,20 @@ public boolean expire(GridCacheContext cctx, IgniteInClosure2X keys, + GridDhtLocalPartition part, + OffheapInvokeAllClosure c + ) throws IgniteCheckedException; + /** * @param cctx Cache context. * @param key Key. @@ -579,6 +595,13 @@ interface OffheapInvokeClosure extends IgniteTree.InvokeClosure { @Nullable public CacheDataRow oldRow(); } + /** + * + */ + interface OffheapInvokeAllClosure extends IgniteTree.InvokeAllClosure, IgnitePredicate { +// boolean preload(); + } + /** * */ @@ -861,6 +884,14 @@ MvccUpdateResult mvccLock( */ public void invoke(GridCacheContext cctx, KeyCacheObject key, OffheapInvokeClosure c) throws IgniteCheckedException; + /** + * @param cctx Cache context. + * @param keys Keys. + * @param c Closure. + * @throws IgniteCheckedException If failed. + */ + public void invokeAll(GridCacheContext cctx, Collection keys, OffheapInvokeAllClosure c) throws IgniteCheckedException; + /** * * @param cctx Cache context. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManagerImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManagerImpl.java index 30fcb7c7631ab..4bd50750c722f 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManagerImpl.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/IgniteCacheOffheapManagerImpl.java @@ -18,6 +18,7 @@ package org.apache.ignite.internal.processors.cache; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -103,6 +104,7 @@ import org.apache.ignite.internal.util.lang.GridIterator; import org.apache.ignite.internal.util.lang.IgniteInClosure2X; import org.apache.ignite.internal.util.typedef.F; +import org.apache.ignite.internal.util.typedef.T3; import org.apache.ignite.internal.util.typedef.X; import org.apache.ignite.internal.util.typedef.internal.CU; import org.apache.ignite.internal.util.typedef.internal.U; @@ -447,6 +449,16 @@ private Iterator cacheData(boolean primary, boolean backup, Affi dataStore(part).invoke(cctx, key, c); } + /** {@inheritDoc} */ + @Override public void invokeAll( + GridCacheContext cctx, + Collection keys, + GridDhtLocalPartition part, + OffheapInvokeAllClosure c) + throws IgniteCheckedException { + dataStore(part).invokeAll(cctx, keys, c); + } + /** {@inheritDoc} */ @Override public void update( GridCacheContext cctx, @@ -1627,6 +1639,20 @@ private boolean canUpdateOldRow(GridCacheContext cctx, @Nullable CacheDataRow ol } } + + /** {@inheritDoc} */ + @Override public void invokeAll(GridCacheContext cctx, Collection keys, OffheapInvokeAllClosure c) + throws IgniteCheckedException { + int cacheId = grp.sharedGroup() ? cctx.cacheId() : CU.UNDEFINED_CACHE_ID; + + List searchRows = new ArrayList<>(keys.size()); + + for (KeyCacheObject key : keys) + searchRows.add(new SearchRow(cacheId, key)); + + invokeAll0(cctx, searchRows, c); + } + /** * @param cctx Cache context. * @param row Search row. @@ -1666,6 +1692,57 @@ private void invoke0(GridCacheContext cctx, CacheSearchRow row, OffheapInvokeClo } } + /** + * @param cctx Cache context. + * @param rows Search rows. + * @param c Closure. + * @throws IgniteCheckedException If failed. + */ + private void invokeAll0(GridCacheContext cctx, List rows, OffheapInvokeAllClosure c) + throws IgniteCheckedException { + if (!busyLock.enterBusy()) + throw new NodeStoppingException("Operation has been cancelled (node is stopping)."); + + try { + assert cctx.shared().database().checkpointLockIsHeldByThread(); + + dataTree.invokeAll(rows, CacheDataRowAdapter.RowData.NO_KEY, c); + + for (T3 tuple : c.result()) { + IgniteTree.OperationType opType = tuple.get1(); + CacheDataRow oldRow = tuple.get2(); + CacheDataRow newRow = tuple.get3(); + + switch (opType) { + case PUT: { + assert newRow != null : tuple; + + finishUpdate(cctx, newRow, oldRow); + + break; + } + + case REMOVE: { + finishRemove(cctx, newRow.key(), oldRow); + + break; + } + + case NOOP: + break; + + default: + assert false : opType; + } + } + + + } + finally { + busyLock.leaveBusy(); + } + } + /** {@inheritDoc} */ @Override public CacheDataRow createRow( GridCacheContext cctx, diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionDemander.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionDemander.java index 61f1e06f9dccb..95aae574a06be 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionDemander.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionDemander.java @@ -30,6 +30,7 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.IgniteLogger; +import org.apache.ignite.IgniteSystemProperties; import org.apache.ignite.cache.CacheRebalanceMode; import org.apache.ignite.cluster.ClusterNode; import org.apache.ignite.configuration.CacheConfiguration; @@ -40,6 +41,7 @@ import org.apache.ignite.internal.cluster.ClusterTopologyCheckedException; import org.apache.ignite.internal.processors.affinity.AffinityAssignment; import org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion; +import org.apache.ignite.internal.processors.cache.BatchedCacheEntries; import org.apache.ignite.internal.processors.cache.CacheEntryInfoCollection; import org.apache.ignite.internal.processors.cache.CacheGroupContext; import org.apache.ignite.internal.processors.cache.CacheMetricsImpl; @@ -86,6 +88,16 @@ * Thread pool for requesting partitions from other nodes and populating local cache. */ public class GridDhtPartitionDemander { + /** todo explain the origin */ + private static final int BATCH_PRELOAD_THRESHOLD = 5; + + /** */ + private static final int CHECKPOINT_THRESHOLD = 200; + + /** */ + private static final boolean batchPageWriteEnabled = + IgniteSystemProperties.getBoolean(IgniteSystemProperties.IGNITE_DATA_STORAGE_BATCH_PAGE_WRITE, false); + /** */ private final GridCacheSharedContext ctx; @@ -766,10 +778,10 @@ public void handleSupplyMessage( part.lock(); try { - Iterator infos = e.getValue().infos().iterator(); + Collection infos = e.getValue().infos(); if (grp.mvccEnabled()) - mvccPreloadEntries(topVer, node, p, infos); + mvccPreloadEntries(topVer, node, p, infos.iterator()); else preloadEntries(topVer, node, p, infos); @@ -851,6 +863,116 @@ public void handleSupplyMessage( } } + /** + * todo should be removed (kept for benchamrking) + */ + public void preloadEntriesSingle(ClusterNode from, + int p, + Collection entries, + AffinityTopologyVersion topVer + ) throws IgniteCheckedException { + GridCacheContext cctx = null; + + // Loop through all received entries and try to preload them. + for (GridCacheEntryInfo entry : entries) { + if (cctx == null || (grp.sharedGroup() && entry.cacheId() != cctx.cacheId())) { + cctx = grp.sharedGroup() ? grp.shared().cacheContext(entry.cacheId()) : grp.singleCacheContext(); + + if (cctx == null) + continue; + else if (cctx.isNear()) + cctx = cctx.dhtCache().context(); + } + + if (!preloadEntry(from, p, entry, topVer, cctx)) { + if (log.isTraceEnabled()) + log.trace("Got entries for invalid partition during " + + "preloading (will skip) [p=" + p + ", entry=" + entry + ']'); + + break; + } + + for (GridCacheContext cctx0 : grp.caches()) { + if (cctx0.statisticsEnabled()) + cctx0.cache().metrics0().onRebalanceKeyReceived(); + } + } + } + + /** + * @param from Node which sent entry. + * @param p Partition id. + * @param entries Preloaded entries. + * @param topVer Topology version. + * + * @throws IgniteCheckedException If failed. + */ + public void preloadEntriesBatch(ClusterNode from, + int p, + Collection entries, + AffinityTopologyVersion topVer + ) throws IgniteCheckedException { + if (entries.isEmpty()) + return; + + Map cctxMap = new HashMap<>(); + + // Map by context. + for (GridCacheEntryInfo info : entries) { + try { + GridCacheContext cctx0 = grp.sharedGroup() ? ctx.cacheContext(info.cacheId()) : grp.singleCacheContext(); + + if (cctx0 == null) + return; + + if (cctx0.isNear()) + cctx0 = cctx0.dhtCache().context(); + + final GridCacheContext cctx = cctx0; + + if (log.isTraceEnabled()) + log.trace("Rebalancing key [key=" + info.key() + ", part=" + p + ", node=" + from.id() + ']'); + + BatchedCacheEntries batch = cctxMap.get(cctx.cacheId()); + + if (batch == null) { + // todo lock should be called for ALL group + cctx.group().listenerLock().readLock().lock(); + + cctxMap.put(cctx.cacheId(), batch = new BatchedCacheEntries(topVer, p, cctx, true)); + } + + batch.addEntry(info.key(), info.value(), info.expireTime(), info.ttl(), info.version(), DR_PRELOAD); + } + catch (GridDhtInvalidPartitionException ignored) { + if (log.isDebugEnabled()) + log.debug("Partition became invalid during rebalancing (will ignore): " + p); + } + } + + for (BatchedCacheEntries batch : cctxMap.values()) { + assert batch.size() > BATCH_PRELOAD_THRESHOLD : batch.size(); + + GridCacheContext cctx = batch.context(); + + batch.lock(); + + try { + cctx.offheap().invokeAll(cctx, batch.keys(), batch.part(), batch.new BatchUpdateClosure()); + } + finally { + batch.unlock(); + + cctx.group().listenerLock().readLock().unlock(); + + for (GridCacheContext cctx0 : grp.caches()) { + if (cctx0.statisticsEnabled()) + cctx0.cache().metrics0().onRebalanceKeysReceived(batch.size()); + } + } + } + } + /** * Adds mvcc entries with theirs history to partition p. * @@ -942,14 +1064,52 @@ private void mvccPreloadEntries(AffinityTopologyVersion topVer, ClusterNode node * * @param node Node which sent entry. * @param p Partition id. - * @param infos Entries info for preload. + * @param infosCol Entries info for preload. * @param topVer Topology version. * @throws IgniteInterruptedCheckedException If interrupted. */ private void preloadEntries(AffinityTopologyVersion topVer, ClusterNode node, int p, - Iterator infos) throws IgniteCheckedException { + Collection infosCol) throws IgniteCheckedException { GridCacheContext cctx = null; + int size = infosCol.size(); + + boolean batchEnabled = + batchPageWriteEnabled && size > BATCH_PRELOAD_THRESHOLD; + + int nBatch = 0; + int total = size / CHECKPOINT_THRESHOLD; + + Iterator infos = infosCol.iterator(); + + // Loop through all received entries and try to preload them. + while (infos.hasNext()) { + ctx.database().checkpointReadLock(); + + boolean tail = (nBatch++ >= (total - 1)); + + try { + List infosBatch = new ArrayList<>(CHECKPOINT_THRESHOLD); + + for (int i = 0; i < (tail ? CHECKPOINT_THRESHOLD + (size % CHECKPOINT_THRESHOLD) : CHECKPOINT_THRESHOLD); i++) { + if (!infos.hasNext()) + break; + + GridCacheEntryInfo entry = infos.next(); + + infosBatch.add(entry); + } + + if (batchEnabled && infosBatch.size() > BATCH_PRELOAD_THRESHOLD) + preloadEntriesBatch(node, p, infosBatch, topVer); + else + preloadEntriesSingle(node, p, infosBatch, topVer); + } + finally { + ctx.database().checkpointReadUnlock(); + } + } + // Loop through all received entries and try to preload them. while (infos.hasNext()) { ctx.database().checkpointReadLock(); @@ -1390,7 +1550,8 @@ private void partitionDone(UUID nodeId, int p, boolean updateState) { "rebalancing [grp=" + grp.cacheOrGroupName() + ", supplier=" + nodeId + ", topVer=" + topologyVersion() + - ", progress=" + (routines - remainingRoutines) + "/" + routines + "]")); + ", progress=" + (routines - remainingRoutines) + "/" + routines + "," + + ", batch=" + batchPageWriteEnabled + "]")); remaining.remove(nodeId); } diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/DataStructure.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/DataStructure.java index 35dd3c46ee431..ab9bf86913bdd 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/DataStructure.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/DataStructure.java @@ -17,6 +17,7 @@ package org.apache.ignite.internal.processors.cache.persistence; +import java.util.Collection; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; import org.apache.ignite.IgniteCheckedException; @@ -307,6 +308,26 @@ protected final R write( return PageHandler.writePage(pageMem, grpId, pageId, this, h, init, wal, null, arg, intArg, lockFailed, statHolder); } + /** + * @param pageId Page ID. + * @param h Handler. + * @param init IO for new page initialization or {@code null} if it is an existing page. + * @param arg Argument. + * @param lockFailed Result in case of lock failure due to page recycling. + * @param statHolder Statistics holder to track IO operations. + * @return Handler result. + * @throws IgniteCheckedException If failed. + */ + protected final R write( + long pageId, + PageHandler h, + PageIO init, + Collection arg, + R lockFailed, + IoStatisticsHolder statHolder) throws IgniteCheckedException { + return PageHandler.writePageBatch(pageMem, grpId, pageId, this, h, init, wal, null, arg, lockFailed, statHolder); + } + /** * @param pageId Page ID. * @param h Handler. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java index 26a535ca17a84..41ece06324654 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java @@ -17,6 +17,7 @@ package org.apache.ignite.internal.processors.cache.persistence; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -1967,7 +1968,7 @@ private Metas getOrAllocatePartitionMetas() throws IgniteCheckedException { return delegate.mvccInitialValue(cctx, key, val, ver, expireTime, mvccVer, newMvccVer); } - + /** {@inheritDoc} */ @Override public boolean mvccApplyHistoryIfAbsent( GridCacheContext cctx, @@ -2107,6 +2108,16 @@ private Metas getOrAllocatePartitionMetas() throws IgniteCheckedException { delegate.invoke(cctx, key, c); } + /** {@inheritDoc} */ + @Override public void invokeAll(GridCacheContext cctx, Collection keys, OffheapInvokeAllClosure c) + throws IgniteCheckedException { + assert ctx.database().checkpointLockIsHeldByThread(); + + CacheDataStore delegate = init0(false); + + delegate.invokeAll(cctx, keys, c); + } + /** {@inheritDoc} */ @Override public void remove(GridCacheContext cctx, KeyCacheObject key, int partId) throws IgniteCheckedException { diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/IgniteCacheDatabaseSharedManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/IgniteCacheDatabaseSharedManager.java index 7fc70d0b8923d..d4db27c74bed4 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/IgniteCacheDatabaseSharedManager.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/IgniteCacheDatabaseSharedManager.java @@ -248,7 +248,7 @@ protected void initPageMemoryDataStructures(DataStorageConfiguration dbCfg) thro boolean persistenceEnabled = memPlcCfg.isPersistenceEnabled(); CacheFreeListImpl freeList = new CacheFreeListImpl(0, - cctx.igniteInstanceName(), + memPlc.config().getName(), memMetrics, memPlc, null, diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/RowStore.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/RowStore.java index 91fd2070cc048..2f2942d51ce1d 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/RowStore.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/RowStore.java @@ -17,6 +17,7 @@ package org.apache.ignite.internal.processors.cache.persistence; +import java.util.Collection; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.internal.pagemem.PageMemory; import org.apache.ignite.internal.processors.cache.CacheGroupContext; @@ -111,6 +112,25 @@ public void addRow(CacheDataRow row, IoStatisticsHolder statHolder) throws Ignit } } + /** + * @param rows Rows. + * @throws IgniteCheckedException If failed. + */ + public void addRows(Collection rows, IoStatisticsHolder statHolder) throws IgniteCheckedException { + if (!persistenceEnabled) + freeList.insertDataRows(rows, statHolder); + else { + ctx.database().checkpointReadLock(); + + try { + freeList.insertDataRows(rows, statHolder); + } + finally { + ctx.database().checkpointReadUnlock(); + } + } + } + /** * @param link Row link. * @param row New row data. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java index 60aefb927ce6f..7a3fefc6010f0 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java @@ -17,6 +17,9 @@ package org.apache.ignite.internal.processors.cache.persistence.freelist; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; import java.util.concurrent.atomic.AtomicReferenceArray; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.IgniteLogger; @@ -30,6 +33,7 @@ import org.apache.ignite.internal.pagemem.wal.record.delta.DataPageUpdateRecord; import org.apache.ignite.internal.processors.cache.persistence.DataRegion; import org.apache.ignite.internal.processors.cache.persistence.DataRegionMetricsImpl; +import org.apache.ignite.internal.processors.cache.persistence.IndexStorageImpl; import org.apache.ignite.internal.processors.cache.persistence.Storable; import org.apache.ignite.internal.processors.cache.persistence.evict.PageEvictionTracker; import org.apache.ignite.internal.processors.cache.persistence.tree.io.AbstractDataPageIO; @@ -133,12 +137,15 @@ private final class UpdateRowHandler extends PageHandler { /** */ private final PageHandler writeRow = new WriteRowHandler(); + /** */ + private final PageHandler writeRows = new WriteRowHandlerBatch(); + /** * */ - private final class WriteRowHandler extends PageHandler { - @Override public Integer run( - int cacheId, + private class WriteRowHandler extends PageHandler { + /** {@inheritDoc} */ + @Override public Integer run(int cacheId, long pageId, long page, long pageAddr, @@ -146,6 +153,33 @@ private final class WriteRowHandler extends PageHandler { Boolean walPlc, T row, int written, + IoStatisticsHolder statHolder + ) throws IgniteCheckedException { + written = run0(pageId, page, pageAddr, iox, row, written, statHolder); + + putPage((AbstractDataPageIO)iox, pageId, page, pageAddr, statHolder); + + return written; + } + + /** + * @param pageId Page ID. + * @param page Page absolute pointer. + * @param pageAddr Page address. + * @param iox IO. + * @param row Data row. + * @param written Count of bytes written. + * @param statHolder Statistics holder to track IO operations. + * @return Result. + * @throws IgniteCheckedException If failed. + */ + protected Integer run0( + long pageId, + long page, + long pageAddr, + PageIO iox, + T row, + int written, IoStatisticsHolder statHolder) throws IgniteCheckedException { AbstractDataPageIO io = (AbstractDataPageIO)iox; @@ -159,15 +193,6 @@ private final class WriteRowHandler extends PageHandler { written = (written == 0 && oldFreeSpace >= rowSize) ? addRow(pageId, page, pageAddr, io, row, rowSize) : addRowFragment(pageId, page, pageAddr, io, row, written, rowSize); - // Reread free space after update. - int newFreeSpace = io.getFreeSpace(pageAddr); - - if (newFreeSpace > MIN_PAGE_FREE_SPACE) { - int bucket = bucket(newFreeSpace, false); - - put(null, pageId, page, pageAddr, bucket, statHolder); - } - if (written == rowSize) evictionTracker.touchPage(pageId); @@ -185,7 +210,7 @@ private final class WriteRowHandler extends PageHandler { * @return Written size which is always equal to row size here. * @throws IgniteCheckedException If failed. */ - private int addRow( + protected int addRow( long pageId, long page, long pageAddr, @@ -225,7 +250,7 @@ private int addRow( * @return Updated written size. * @throws IgniteCheckedException If failed. */ - private int addRowFragment( + protected int addRowFragment( long pageId, long page, long pageAddr, @@ -254,6 +279,83 @@ private int addRowFragment( return written + payloadSize; } + + /** + * Put page to freelist if needed. + * + * @param iox IO. + * @param pageId Page ID. + * @param page Paege pointer. + * @param pageAddr Page address. + * @param statHolder Statistics holder to track IO operations. + */ + protected void putPage( + AbstractDataPageIO iox, + long pageId, + long page, + long pageAddr, + IoStatisticsHolder statHolder + ) throws IgniteCheckedException { + // Reread free space after update. + int newFreeSpace = ((AbstractDataPageIO)iox).getFreeSpace(pageAddr); + + if (newFreeSpace > MIN_PAGE_FREE_SPACE) { + int bucket = bucket(newFreeSpace, false); + + put(null, pageId, page, pageAddr, bucket, statHolder); + } + } + } + + /** + * + */ + private class WriteRowHandlerBatch extends WriteRowHandler { + /** {@inheritDoc} */ + @Override public Integer runBatch( + int cacheId, + long pageId, + long page, + long pageAddr, + PageIO io, + Boolean walPlc, + Collection args, + IoStatisticsHolder statHolder + ) throws IgniteCheckedException { + int maxPayloadSize = pageSize() - AbstractDataPageIO.MIN_DATA_PAGE_OVERHEAD; + + AbstractDataPageIO iox = (AbstractDataPageIO)io; + + // todo !! DO NOT FORGET WAL DELTA !! + if (iox.getFreeSpace(pageAddr) == maxPayloadSize) { + // todo save links for WAL + + iox.addRows(pageMem, pageId, pageAddr, args, pageSize()); + + // todo update wal + } + else { + for (T row : args) { + assert iox.getFreeSpace(pageAddr) > 0 : iox.getFreeSpace(pageAddr); + + int size = row.size(); + + int written = size > maxPayloadSize ? + addRowFragment(pageId, page, pageAddr, iox, row, size - (size % maxPayloadSize), size) : + addRow(pageId, page, pageAddr, iox, row, size); + + assert written == size : "The object is not fully written into page: " + + "pageId=" + pageId + ", written=" + written + ", size=" + row.size(); + + evictionTracker.touchPage(pageId); + } + } + + // return page to freelist if needed + putPage((AbstractDataPageIO)io, pageId, page, pageAddr, statHolder); + + return COMPLETE; + } } /** */ @@ -509,6 +611,155 @@ else if (PageIdUtils.tag(pageId) != PageIdAllocator.FLAG_DATA) while (written != COMPLETE); } + /** {@inheritDoc} */ + @Override public void insertDataRows(Collection rows, IoStatisticsHolder statHolder) throws IgniteCheckedException { + // 1. split into 3 bags + // A. Large objects. + // B1. Tails of large objects + // B2. small objects + + // Max bytes per data page. + int maxPayloadSize = pageSize() - AbstractDataPageIO.MIN_DATA_PAGE_OVERHEAD; + + int maxRowsPerPage = IndexStorageImpl.MAX_IDX_NAME_LEN; + + // Data rows <-> count of pages needed + List largeRows = new ArrayList<>(16); + + // other objects + List regularRows = new ArrayList<>(16); + + for (T dataRow : rows) { + if (dataRow.size() < maxPayloadSize) + regularRows.add(dataRow); + else { + largeRows.add(dataRow); + + int tailSize = dataRow.size() % maxPayloadSize; + + if (tailSize > 0) + regularRows.add(dataRow); + } + } + + // Writing large objects. + for (T row : largeRows) { + int rowSize = row.size(); + + int written = 0; + + do { + if (written != 0) + memMetrics.incrementLargeEntriesPages(); + + int remaining = rowSize - written; + + long pageId; + + if (remaining >= MIN_SIZE_FOR_DATA_PAGE) + pageId = takeEmptyPage(REUSE_BUCKET, ioVersions(), statHolder); + else + break; + + AbstractDataPageIO initIo = null; + + if (pageId == 0L) { + pageId = allocateDataPage(row.partition()); + + initIo = ioVersions().latest(); + } + else if (PageIdUtils.tag(pageId) != PageIdAllocator.FLAG_DATA) + pageId = initReusedPage(pageId, row.partition(), statHolder); + else + pageId = PageIdUtils.changePartitionId(pageId, (row.partition())); + + written = write(pageId, writeRow, initIo, row, written, FAIL_I, statHolder); + + assert written != FAIL_I; // We can't fail here. + } + while (written != COMPLETE); + } + + List dataRows = new ArrayList<>(maxRowsPerPage); + + int remainPageSpace = 0; + + long pageId = 0; + + AbstractDataPageIO initIo = null; + + for (int i = 0; i < regularRows.size(); i++) { + T row = regularRows.get(i); + + boolean tail = i == (regularRows.size() - 1); + + boolean fragment = row.size() > maxPayloadSize; + + int payloadSize = fragment ? (row.size() % maxPayloadSize) + 12 : row.size() + 4; + + // There is no space left on this page. + if (((remainPageSpace - payloadSize) < 0 || dataRows.size() == maxRowsPerPage) && pageId != 0) { + int written = write(pageId, writeRows, initIo, dataRows, FAIL_I, statHolder); + + assert written == COMPLETE : written; + + initIo = null; + remainPageSpace = 0; + pageId = 0; + dataRows.clear(); + } + + dataRows.add(row); + + if (pageId == 0) { + int minBucket = bucket(payloadSize, false) + 1; + + if (payloadSize != MIN_SIZE_FOR_DATA_PAGE) { + for (int b = REUSE_BUCKET - 1; b >= minBucket; b--) { + pageId = takeEmptyPage(b, ioVersions(), statHolder); + + if (pageId != 0L) { + remainPageSpace = (b << shift); //todo + 4, wtf "+4"? + + break; + } + } + } + + if (pageId == 0) + pageId = takeEmptyPage(REUSE_BUCKET, ioVersions(), statHolder); + + if (pageId == 0) { + pageId = allocateDataPage(row.partition()); + + initIo = ioVersions().latest(); + } + else if (PageIdUtils.tag(pageId) != PageIdAllocator.FLAG_DATA) + pageId = initReusedPage(pageId, row.partition(), statHolder); + else + pageId = PageIdUtils.changePartitionId(pageId, row.partition()); + + if (remainPageSpace == 0) + remainPageSpace = maxPayloadSize; + } + + remainPageSpace -= payloadSize; + + if (tail) { + int written; + + if (dataRows.size() == 1) { + written = fragment ? row.size() - (row.size() % maxPayloadSize) : 0; + + written = write(pageId, writeRows, initIo, row, written, FAIL_I, statHolder); + } else + written = write(pageId, writeRows, initIo, dataRows, FAIL_I, statHolder); + + assert written == COMPLETE : written; + } + } + } + /** * @param reusedPageId Reused page id. * @param partId Partition id. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/FreeList.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/FreeList.java index e28d421bdf063..894c1aa64faca 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/FreeList.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/FreeList.java @@ -17,6 +17,7 @@ package org.apache.ignite.internal.processors.cache.persistence.freelist; +import java.util.Collection; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.IgniteLogger; import org.apache.ignite.internal.processors.cache.persistence.Storable; @@ -32,6 +33,12 @@ public interface FreeList { */ public void insertDataRow(T row, IoStatisticsHolder statHolder) throws IgniteCheckedException; + /** + * @param rows Rows. + * @throws IgniteCheckedException If failed. + */ + public void insertDataRows(Collection rows, IoStatisticsHolder statHolder) throws IgniteCheckedException; + /** * @param link Row link. * @param row New row data. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java index 54d9816bcb7e2..7ab4dbe5345aa 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java @@ -1822,6 +1822,11 @@ public final boolean removex(L row) throws IgniteCheckedException { } } + /** {@inheritDoc} */ + @Override public void invokeAll(List keys, Object z, InvokeAllClosure c) throws IgniteCheckedException { + throw new UnsupportedOperationException("Not implemented yet"); + } + /** * @param x Invoke operation. * @param pageId Page ID. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/AbstractDataPageIO.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/AbstractDataPageIO.java index 78752bbfefc84..4b2d4030fe581 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/AbstractDataPageIO.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/io/AbstractDataPageIO.java @@ -31,6 +31,7 @@ import org.apache.ignite.internal.processors.cache.persistence.tree.util.PageHandler; import org.apache.ignite.internal.util.GridStringBuilder; import org.apache.ignite.internal.util.typedef.internal.SB; +import org.apache.ignite.internal.util.typedef.internal.U; import org.jetbrains.annotations.Nullable; import static org.apache.ignite.internal.util.GridUnsafe.bufferAddress; @@ -810,7 +811,7 @@ public void addRow( final int rowSize, final int pageSize ) throws IgniteCheckedException { - assert rowSize <= getFreeSpace(pageAddr) : "can't call addRow if not enough space for the whole row"; + assert rowSize <= getFreeSpace(pageAddr) : "can't call addRow if not enough space for the whole row (free=" + getFreeSpace(pageAddr) + ", required=" + rowSize + ")"; int fullEntrySize = getPageEntrySize(rowSize, SHOW_PAYLOAD_LEN | SHOW_ITEM); @@ -977,6 +978,75 @@ public void addRowFragment( addRowFragment(null, pageId, pageAddr, 0, 0, lastLink, null, payload, pageSize); } + /** + * @param pageMem Page memory. + * @param pageId Page ID to use to construct a link. + * @param pageAddr Page address. + * @param rows Data rows. + * @param pageSize Page size. + * @throws IgniteCheckedException If failed. + */ + public void addRows( + final PageMemory pageMem, + final long pageId, + final long pageAddr, + final Collection rows, + final int pageSize + ) throws IgniteCheckedException { + // todo code duplication (3 times!) + int maxPayloadSIze = pageSize - MIN_DATA_PAGE_OVERHEAD; + int dataOff = pageSize; + int cnt = 0; + int written = 0; + + for (T row : rows) { + boolean fragment = row.size() > maxPayloadSIze; + + int payloadSize = row.size() % maxPayloadSIze; + + assert payloadSize <= getFreeSpace(pageAddr) : "can't call addRow if not enough space for the whole row"; + + int sizeSetup = fragment ? SHOW_PAYLOAD_LEN | SHOW_LINK | SHOW_ITEM : SHOW_PAYLOAD_LEN | SHOW_ITEM; + + int fullEntrySize = getPageEntrySize(payloadSize, sizeSetup); + + written += fullEntrySize; + + dataOff -= (fullEntrySize - ITEM_SIZE); + + if (fragment) { + ByteBuffer buf = pageMem.pageBuffer(pageAddr); + + buf.position(dataOff); + + buf.putShort((short)(payloadSize | FRAGMENTED_FLAG)); + buf.putLong(row.link()); + + // todo is it 0? + writeFragmentData(row, buf, 0, payloadSize); + } + else + writeRowData(pageAddr, dataOff, payloadSize, row, true); + + setItem(pageAddr, cnt, directItemFromOffset(dataOff)); + + assert checkIndex(cnt) : cnt; + assert getIndirectCount(pageAddr) <= getDirectCount(pageAddr); + + setLinkByPageId(row, pageId, cnt); + + ++cnt; + } + + setDirectCount(pageAddr, cnt); + + setFirstEntryOffset(pageAddr, dataOff, pageSize); + + // Update free space. If number of indirect items changed, then we were able to reuse an item slot. + // + (getIndirectCount(pageAddr) != indirectCnt ? ITEM_SIZE : 0) + setRealFreeSpace(pageAddr, getRealFreeSpace(pageAddr) - written, pageSize); + } + /** * Adds maximum possible fragment of the given row to this data page and sets respective link to the row. * @@ -1112,6 +1182,7 @@ private int insertItem(long pageAddr, int dataOff, int directCnt, int indirectCn setItem(pageAddr, directCnt, directItemFromOffset(dataOff)); setDirectCount(pageAddr, directCnt + 1); + assert getDirectCount(pageAddr) == directCnt + 1; return directCnt; // Previous directCnt will be our itemId. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/util/PageHandler.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/util/PageHandler.java index 5ab1bf38dbc18..72302bf36ecad 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/util/PageHandler.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/util/PageHandler.java @@ -18,6 +18,7 @@ package org.apache.ignite.internal.processors.cache.persistence.tree.util; import java.nio.ByteBuffer; +import java.util.Collection; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.internal.pagemem.PageMemory; import org.apache.ignite.internal.pagemem.PageSupport; @@ -70,6 +71,32 @@ public abstract R run( ) throws IgniteCheckedException; + /** + * @param cacheId Cache ID. + * @param pageId Page ID. + * @param page Page absolute pointer. + * @param pageAddr Page address. + * @param io IO. + * @param walPlc Full page WAL record policy. + * @param args Arguments. + * @param statHolder Statistics holder to track IO operations. + * @return Result. + * @throws IgniteCheckedException If failed. + */ + public R runBatch( + int cacheId, + long pageId, + long page, + long pageAddr, + PageIO io, + Boolean walPlc, + Collection args, + IoStatisticsHolder statHolder + ) throws IgniteCheckedException { + // todo + throw new UnsupportedOperationException(); + } + /** * @param cacheId Cache ID. * @param pageId Page ID. @@ -308,6 +335,74 @@ public static R writePage( } } + /** + * @param pageMem Page memory. + * @param grpId Group ID. + * @param pageId Page ID. + * @param lsnr Lock listener. + * @param h Handler. + * @param init IO for new page initialization or {@code null} if it is an existing page. + * @param wal Write ahead log. + * @param walPlc Full page WAL record policy. + * @param args Argument. + * @param lockFailed Result in case of lock failure due to page recycling. + * @param statHolder Statistics holder to track IO operations. + * @return Handler result. + * @throws IgniteCheckedException If failed. + */ + public static R writePageBatch( + PageMemory pageMem, + int grpId, + final long pageId, + PageLockListener lsnr, + PageHandler h, + PageIO init, + IgniteWriteAheadLogManager wal, + Boolean walPlc, + Collection args, + R lockFailed, + IoStatisticsHolder statHolder + ) throws IgniteCheckedException { + boolean releaseAfterWrite = true; + + long page = pageMem.acquirePage(grpId, pageId, statHolder); + + try { + long pageAddr = writeLock(pageMem, grpId, pageId, page, lsnr, false); + + if (pageAddr == 0L) + return lockFailed; + + boolean ok = false; + + try { + if (init != null) { + // It is a new page and we have to initialize it. + doInitPage(pageMem, grpId, pageId, page, pageAddr, init, wal); + walPlc = FALSE; + } + else + init = PageIO.getPageIO(pageAddr); + + R res = h.runBatch(grpId, pageId, page, pageAddr, init, walPlc, args, statHolder); + + ok = true; + + return res; + } + finally { + assert PageIO.getCrc(pageAddr) == 0; //TODO GG-11480 + + if (releaseAfterWrite = h.releaseAfterWrite(grpId, pageId, page, pageAddr, null, 0)) + writeUnlock(pageMem, grpId, pageId, page, pageAddr, lsnr, walPlc, ok); + } + } + finally { + if (releaseAfterWrite) + pageMem.releasePage(grpId, pageId, page); + } + } + /** * @param pageMem Page memory. * @param grpId Group ID. diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/tree/CacheDataTree.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/tree/CacheDataTree.java index b3c1c69e66319..7546f0569c3dc 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/tree/CacheDataTree.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/tree/CacheDataTree.java @@ -17,6 +17,9 @@ package org.apache.ignite.internal.processors.cache.tree; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.internal.pagemem.PageUtils; import org.apache.ignite.internal.pagemem.store.PageStore; @@ -45,6 +48,8 @@ import org.apache.ignite.internal.stat.IoStatisticsHolder; import org.apache.ignite.internal.util.GridUnsafe; import org.apache.ignite.internal.util.lang.GridCursor; +import org.apache.ignite.internal.util.typedef.T2; +import org.apache.ignite.internal.util.typedef.T3; import org.apache.ignite.internal.util.typedef.internal.CU; import static java.lang.Boolean.FALSE; @@ -327,6 +332,87 @@ public CacheDataRowStore rowStore() { return rowStore; } + /** + * todo fake implementation only for checking that closure is working properly with preloader. + * @param keys Keys. + * @param x Implementation specific argument, {@code null} always means that we need a full detached data row. + * @param c Closure. + * @throws IgniteCheckedException If failed. + */ + @Override public void invokeAll(List keys, Object x, InvokeAllClosure c) throws IgniteCheckedException { + checkDestroyed(); + + int cnt = keys.size(); + + assert cnt > 0 : cnt; + + CacheSearchRow lower = keys.get(0); + CacheSearchRow upper = keys.get(cnt - 1); + + List> batch = new ArrayList<>(cnt); + + Iterator rowItr = keys.iterator(); + + assert lower.key().hashCode() <= upper.key().hashCode() : "Keys must be lower=" + lower.key().hashCode() + ", upper=" + upper.key().hashCode(); + + GridCursor cur = find(lower, upper, CacheDataRowAdapter.RowData.FULL); + + CacheSearchRow lastSearchRow = null; + KeyCacheObject newKey = null; + + while (cur.next()) { + CacheDataRow oldRow = cur.get(); + KeyCacheObject oldKey = oldRow.key(); + + while (newKey == null || newKey.hashCode() <= oldKey.hashCode()) { + if (newKey != null && newKey.hashCode() == oldKey.hashCode()) { + while (newKey.hashCode() == oldKey.hashCode()) { + if (newKey.equals(oldKey)) + batch.add(new T2<>(oldRow, lastSearchRow)); + else + batch.add(new T2<>(null, lastSearchRow)); + + if (!rowItr.hasNext()) + break; + + lastSearchRow = rowItr.next(); + newKey = lastSearchRow.key(); + } + } + else { + if (lastSearchRow != null) + batch.add(new T2<>(null, lastSearchRow)); + + if (!rowItr.hasNext()) + break; + + lastSearchRow = rowItr.next(); + newKey = lastSearchRow.key(); + } + + if (!rowItr.hasNext()) + break; + } + } + + while (rowItr.hasNext()) + batch.add(new T2<>(null, rowItr.next())); + + c.call(batch); + + for (T3 t3 : c.result()) { + OperationType oper = t3.get1(); + CacheDataRow oldRow = t3.get2(); + CacheDataRow newRow = t3.get3(); + + if (oper == OperationType.PUT) + put(newRow); + else + if (oper == OperationType.REMOVE) + remove(oldRow); + } + } + /** {@inheritDoc} */ @Override protected int compare(BPlusIO iox, long pageAddr, int idx, CacheSearchRow row) throws IgniteCheckedException { diff --git a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteTree.java b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteTree.java index 9e854d28f6cb0..12d1a6d3918dc 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteTree.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteTree.java @@ -17,8 +17,12 @@ package org.apache.ignite.internal.util; +import java.util.Collection; +import java.util.List; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.internal.util.lang.GridCursor; +import org.apache.ignite.internal.util.typedef.T2; +import org.apache.ignite.internal.util.typedef.T3; import org.jetbrains.annotations.Nullable; /** @@ -42,6 +46,14 @@ public interface IgniteTree { */ public void invoke(L key, Object x, InvokeClosure c) throws IgniteCheckedException; + /** + * @param keys Keys. + * @param x Implementation specific argument, {@code null} always means that we need a full detached data row. + * @param c Closure. + * @throws IgniteCheckedException If failed. + */ + public void invokeAll(List keys, Object x, InvokeAllClosure c) throws IgniteCheckedException; + /** * Returns the value to which the specified key is mapped, or {@code null} if this tree contains no mapping for the * key. @@ -130,6 +142,25 @@ interface InvokeClosure { OperationType operationType(); } + /** + * T found row + * L search row + */ + interface InvokeAllClosure { + /** + * + * @param rows Old row -> new row + * @throws IgniteCheckedException If failed. + */ + void call(@Nullable Collection> rows) throws IgniteCheckedException; + + /** + * + * @return operation, old row, new row + */ + Collection> result(); + } + /** * */ diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/database/FreeListPreloadWithBatchUpdatesTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/database/FreeListPreloadWithBatchUpdatesTest.java new file mode 100644 index 0000000000000..2458cfd375d6e --- /dev/null +++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/database/FreeListPreloadWithBatchUpdatesTest.java @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ignite.internal.processors.database; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ThreadLocalRandom; +import org.apache.ignite.Ignite; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.IgniteDataStreamer; +import org.apache.ignite.cache.CacheAtomicityMode; +import org.apache.ignite.cache.CacheMode; +import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; +import org.apache.ignite.cluster.BaselineNode; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.DataRegionConfiguration; +import org.apache.ignite.configuration.DataStorageConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.configuration.WALMode; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.IgniteInterruptedCheckedException; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState; +import org.apache.ignite.internal.util.typedef.PA; +import org.apache.ignite.internal.util.typedef.internal.U; +import org.apache.ignite.testframework.GridTestUtils; +import org.apache.ignite.testframework.junits.WithSystemProperty; +import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static org.apache.ignite.IgniteSystemProperties.IGNITE_DATA_STORAGE_BATCH_PAGE_WRITE; +import static org.apache.ignite.IgniteSystemProperties.IGNITE_PDS_WAL_REBALANCE_THRESHOLD; +import static org.junit.Assert.assertArrayEquals; + +/** + * + */ +@RunWith(Parameterized.class) +public class FreeListPreloadWithBatchUpdatesTest extends GridCommonAbstractTest { + /** */ + private static final int HDR_SIZE = 8 + 32; + + /** */ + private static final long DEF_REG_SIZE_INIT = 3400 * 1024 * 1024L; + + /** */ + private static final long DEF_REG_SIZE = 6144 * 1024 * 1024L; + + /** */ + private static final String DEF_CACHE_NAME = "some-cache"; + + /** */ + @Parameterized.Parameters(name = "with atomicity={0} and persistence={1}") + public static Iterable setup() { + return Arrays.asList(new Object[][]{ + {CacheAtomicityMode.ATOMIC, false}, + {CacheAtomicityMode.ATOMIC, true}, + {CacheAtomicityMode.TRANSACTIONAL, false}, + {CacheAtomicityMode.TRANSACTIONAL, true}, + {CacheAtomicityMode.TRANSACTIONAL_SNAPSHOT, false}, + {CacheAtomicityMode.TRANSACTIONAL_SNAPSHOT, true} + }); + } + + /** */ + @Parameterized.Parameter() + public CacheAtomicityMode cacheAtomicityMode; + + /** */ + @Parameterized.Parameter(1) + public boolean persistence; + + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { + IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); + + DataRegionConfiguration def = new DataRegionConfiguration(); + def.setInitialSize(DEF_REG_SIZE_INIT); + def.setMaxSize(DEF_REG_SIZE); + def.setPersistenceEnabled(persistence); + + DataStorageConfiguration storeCfg = new DataStorageConfiguration(); + + storeCfg.setDefaultDataRegionConfiguration(def); + + if (persistence) { + storeCfg.setWalMode(WALMode.LOG_ONLY); + storeCfg.setMaxWalArchiveSize(Integer.MAX_VALUE); + } + + cfg.setDataStorageConfiguration(storeCfg); + + return cfg; + } + + /** + * + */ + @Before + public void before() throws Exception { + cleanPersistenceDir(); + } + + /** + * + */ + @After + public void after() throws Exception { + stopAllGrids(); + + cleanPersistenceDir(); + } + + /** + * + */ + @Test + @WithSystemProperty(key = IGNITE_DATA_STORAGE_BATCH_PAGE_WRITE, value = "true") + public void testBatchRebalance() throws Exception { + Ignite node = startGrid(0); + + node.cluster().active(true); + + node.cluster().baselineAutoAdjustEnabled(false); + + node.createCache(ccfg()); + + int cnt = 100_000; + + int minSize = 0; + int maxSize = 16384; + + Map srcMap = new HashMap<>(); + + for (int i = 0; i < cnt; i++) { + int size = maxSize == minSize ? maxSize : minSize + ThreadLocalRandom.current().nextInt(maxSize - minSize); + + byte[] obj = new byte[size]; + + srcMap.put(i, obj); + } + + try (IgniteDataStreamer streamer = node.dataStreamer(DEF_CACHE_NAME)) { + streamer.addData(srcMap); + } + + log.info("Data loaded."); + + if (persistence) + node.cluster().active(false); + + final IgniteEx node2 = startGrid(1); + + if (persistence) { + List list = new ArrayList<>(node.cluster().currentBaselineTopology()); + + list.add(node2.localNode()); + + node.cluster().active(true); + + node.cluster().setBaselineTopology(list); + } + + log.info("Await rebalance."); + + awaitRebalance(node2, DEF_CACHE_NAME); + + node.close(); + + validateCacheEntries(node2.cache(DEF_CACHE_NAME), srcMap); + + if (persistence) { + node2.close(); + + Ignite ignite = startGrid(1); + + ignite.cluster().active(true); + + log.info("Validate entries after restart"); + + validateCacheEntries(ignite.cache(DEF_CACHE_NAME), srcMap); + } + } + + /** + * + */ + @Test + @WithSystemProperty(key = IGNITE_PDS_WAL_REBALANCE_THRESHOLD, value = "100") + @WithSystemProperty(key = IGNITE_DATA_STORAGE_BATCH_PAGE_WRITE, value = "true") + public void testBatchHistoricalRebalance() throws Exception { + if (!persistence) + return; + + // TODO https://issues.apache.org/jira/browse/IGNITE-7384 + // TODO http://apache-ignite-developers.2346864.n4.nabble.com/Historical-rebalance-td38380.html + if (cacheAtomicityMode == CacheAtomicityMode.TRANSACTIONAL_SNAPSHOT) + return; + + Ignite node = startGrids(2); + + node.cluster().active(true); + + IgniteCache cache = node.createCache(ccfg()); + + int cnt = 10_000; + + log.info("Loading " + cnt + " random entries."); + + Map srcMap = new HashMap<>(); + + for (int i = 0; i < cnt; i++) { + byte[] obj = new byte[ThreadLocalRandom.current().nextInt(16384)]; + + srcMap.put(i, obj); + } + + try (IgniteDataStreamer streamer = node.dataStreamer(DEF_CACHE_NAME)) { + streamer.addData(srcMap); + } + + forceCheckpoint(); + + log.info("Stopping node #2."); + + grid(1).close(); + + log.info("Updating values on node #1."); + + for (int i = 100; i < 1000; i++) { + if (i % 33 == 0) { + cache.remove(i); + + srcMap.remove(i); + } + else { + byte[] bytes = new byte[512]; + + Arrays.fill(bytes, (byte)1); + + srcMap.put(i, bytes); + cache.put(i, bytes); + } + } + + forceCheckpoint(); + + log.info("Starting node #2."); + + IgniteEx node2 = startGrid(1); + + log.info("Await rebalance on node #2."); + + awaitRebalance(node2, DEF_CACHE_NAME); + + log.info("Stop node #1."); + + node.close(); + + validateCacheEntries(node2.cache(DEF_CACHE_NAME), srcMap); + } + + /** + * @param node Ignite node. + * @param name Cache name. + */ + private void awaitRebalance(IgniteEx node, String name) throws IgniteInterruptedCheckedException { + boolean ok = GridTestUtils.waitForCondition(new PA() { + @Override public boolean apply() { + for ( GridDhtLocalPartition part : node.context().cache().cache(name).context().group().topology().localPartitions()) { + if (part.state() != GridDhtPartitionState.OWNING) + return false; + } + + return true; + } + }, 60_000); + + U.sleep(3000); + + assertTrue(ok); + } + + /** + * @param cache Cache. + * @param map Map. + */ + @SuppressWarnings("unchecked") + private void validateCacheEntries(IgniteCache cache, Map map) { + int size = cache.size(); + + assertEquals("Cache size mismatch.", map.size(), size); + + log.info("Validation " + cache.getName() + ", size=" + size); + + for (Map.Entry e : map.entrySet()) { + String idx = "key=" + e.getKey(); + + assertEquals(idx, e.getValue().length, ((byte[])cache.get(e.getKey())).length); + } + } + + /** + * @return Cache configuration. + */ + private CacheConfiguration ccfg() { + return ccfg(1, CacheMode.REPLICATED); + } + + /** + * @return Cache configuration. + */ + private CacheConfiguration ccfg(int parts, CacheMode mode) { + return new CacheConfiguration(DEF_CACHE_NAME) + .setAffinity(new RendezvousAffinityFunction(false, parts)) + .setCacheMode(mode) + .setAtomicityMode(cacheAtomicityMode); + } +} + +