diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java index 34d2e200d967..2189f4e8c4df 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java @@ -56,10 +56,17 @@ public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate this.delegate = delegate; } + /** + * @deprecated since 4.0.0. Use + * {@link #TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit)}. This + * constructor will be removed in a future. + * @see HBASE-29272 + */ + @Deprecated public TableSnapshotRegionSplit(TableDescriptor htd, RegionInfo regionInfo, List locations, Scan scan, Path restoreDir) { - this.delegate = - new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir); + this.delegate = new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, + restoreDir, 1); } @Override diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java index c71a42aea5d1..126eb8f3881a 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java @@ -94,10 +94,17 @@ public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate this.delegate = delegate; } + /** + * @deprecated since 4.0.0. Use + * {@link #TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit)}. This + * constructor will be removed in a future. + * @see HBASE-29272 + */ + @Deprecated public TableSnapshotRegionSplit(TableDescriptor htd, RegionInfo regionInfo, List locations, Scan scan, Path restoreDir) { - this.delegate = - new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir); + this.delegate = new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, + restoreDir, 1); } @Override diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java index 501209f1c902..44c40e130a2e 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java @@ -24,6 +24,7 @@ import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.UUID; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -47,6 +48,7 @@ import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.snapshot.SnapshotManifest; +import org.apache.hadoop.hbase.snapshot.SnapshotRegionSizeCalculator; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.RegionSplitter; @@ -145,13 +147,14 @@ public static class InputSplit implements Writable { private String[] locations; private String scan; private String restoreDir; + private long length; // constructor for mapreduce framework / Writable public InputSplit() { } public InputSplit(TableDescriptor htd, RegionInfo regionInfo, List locations, Scan scan, - Path restoreDir) { + Path restoreDir, long length) { this.htd = htd; this.regionInfo = regionInfo; if (locations == null || locations.isEmpty()) { @@ -166,6 +169,7 @@ public InputSplit(TableDescriptor htd, RegionInfo regionInfo, List locat } this.restoreDir = restoreDir.toString(); + this.length = length; } public TableDescriptor getHtd() { @@ -181,8 +185,7 @@ public String getRestoreDir() { } public long getLength() { - // TODO: We can obtain the file sizes of the snapshot here. - return 0; + return length; } public String[] getLocations() { @@ -440,6 +443,8 @@ public static List getSplits(Scan scan, SnapshotManifest manifest, } } + Map regionSizes = + SnapshotRegionSizeCalculator.calculateRegionSizes(conf, manifest); List splits = new ArrayList<>(); for (RegionInfo hri : regionManifests) { // load region descriptor @@ -455,7 +460,7 @@ public static List getSplits(Scan scan, SnapshotManifest manifest, hosts = calculateLocationsForInputSplit(conf, htd, hri, tableDir); } } - + long snapshotRegionSize = regionSizes.getOrDefault(hri.getEncodedName(), 0L); if (numSplits > 1) { byte[][] sp = sa.split(hri.getStartKey(), hri.getEndKey(), numSplits, true); for (int i = 0; i < sp.length - 1; i++) { @@ -478,7 +483,8 @@ public static List getSplits(Scan scan, SnapshotManifest manifest, Bytes.compareTo(scan.getStopRow(), sp[i + 1]) < 0 ? scan.getStopRow() : sp[i + 1]); } - splits.add(new InputSplit(htd, hri, hosts, boundedScan, restoreDir)); + splits.add(new InputSplit(htd, hri, hosts, boundedScan, restoreDir, + snapshotRegionSize / numSplits)); } } } else { @@ -487,7 +493,7 @@ public static List getSplits(Scan scan, SnapshotManifest manifest, hri.getEndKey()) ) { - splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir)); + splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir, snapshotRegionSize)); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java index 151319e165a2..f982b8652224 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java @@ -51,7 +51,6 @@ import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser; import org.apache.hbase.thirdparty.org.apache.commons.cli.DefaultParser; import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; -import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; @@ -151,6 +150,7 @@ String getStateToString() { private AtomicInteger logsCount = new AtomicInteger(); private AtomicLong hfilesArchiveSize = new AtomicLong(); private AtomicLong hfilesSize = new AtomicLong(); + private Map regionSizeMap = new ConcurrentHashMap<>(); private AtomicLong hfilesMobSize = new AtomicLong(); private AtomicLong nonSharedHfilesArchiveSize = new AtomicLong(); private AtomicLong logSize = new AtomicLong(); @@ -176,6 +176,14 @@ String getStateToString() { this.fs = fs; } + /** + * Returns the map containing region sizes. + * @return A map where keys are region names and values are their corresponding sizes. + */ + public Map getRegionSizeMap() { + return regionSizeMap; + } + /** Returns the snapshot descriptor */ public SnapshotDescription getSnapshotDescription() { return ProtobufUtil.createSnapshotDesc(this.snapshot); @@ -347,6 +355,12 @@ FileInfo addStoreFile(final RegionInfo region, final String family, return new FileInfo(inArchive, size, isCorrupted); } + void updateRegionSizeMap(final RegionInfo region, + final SnapshotRegionManifest.StoreFile storeFile) { + long currentSize = regionSizeMap.getOrDefault(region.getEncodedName(), 0L); + regionSizeMap.put(region.getEncodedName(), currentSize + storeFile.getFileSize()); + } + /** * Add the specified log file to the stats * @param server server name @@ -613,6 +627,7 @@ public void storeFile(final RegionInfo regionInfo, final String family, if (!storeFile.hasReference()) { stats.addStoreFile(regionInfo, family, storeFile, filesMap); } + stats.updateRegionSizeMap(regionInfo, storeFile); } }); return stats; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotRegionSizeCalculator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotRegionSizeCalculator.java new file mode 100644 index 000000000000..6c12e8587d09 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotRegionSizeCalculator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.snapshot.SnapshotInfo.SnapshotStats; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + +/** + * Utility class to calculate the size of each region in a snapshot. + */ +@InterfaceAudience.Private +public class SnapshotRegionSizeCalculator { + + /** + * Calculate the size of each region in the snapshot. + * @return A map of region encoded names to their total size in bytes. + * @throws IOException If an error occurs during calculation. + */ + public static Map calculateRegionSizes(Configuration conf, + SnapshotManifest manifest) throws IOException { + FileSystem fs = FileSystem.get(CommonFSUtils.getRootDir(conf).toUri(), conf); + SnapshotProtos.SnapshotDescription snapshot = + SnapshotDescriptionUtils.readSnapshotInfo(fs, manifest.getSnapshotDir()); + SnapshotStats stats = SnapshotInfo.getSnapshotStats(conf, snapshot, null); + return stats.getRegionSizeMap(); + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotRegionSizeCalculator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotRegionSizeCalculator.java new file mode 100644 index 000000000000..62d56dd7b38f --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotRegionSizeCalculator.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + +@Category(SmallTests.class) +public class TestSnapshotRegionSizeCalculator { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSnapshotRegionSizeCalculator.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static FileSystem fs; + private static Path rootDir; + private static Path snapshotDir; + private static SnapshotProtos.SnapshotDescription snapshotDesc; + private static SnapshotManifest manifest; + private static Admin admin; + private static Configuration conf; + + @BeforeClass + public static void setupCluster() throws Exception { + TEST_UTIL.startMiniCluster(3); + conf = TEST_UTIL.getConfiguration(); + fs = TEST_UTIL.getTestFileSystem(); + rootDir = TEST_UTIL.getDataTestDir("TestSnapshotRegionSizeCalculator"); + CommonFSUtils.setRootDir(conf, rootDir); + admin = TEST_UTIL.getAdmin(); + } + + @AfterClass + public static void tearDown() throws Exception { + fs.delete(rootDir, true); + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testCalculateRegionSize() throws IOException { + TableName tableName = TableName.valueOf("test_table"); + String snapshotName = "test_snapshot"; + + // table has no data + TEST_UTIL.createTable(tableName, Bytes.toBytes("info")); + admin = TEST_UTIL.getConnection().getAdmin(); + admin.snapshot(snapshotName, tableName); + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, + TEST_UTIL.getDefaultRootDirPath()); + SnapshotProtos.SnapshotDescription snapshotDesc = + SnapshotDescriptionUtils.readSnapshotInfo(TEST_UTIL.getTestFileSystem(), snapshotDir); + SnapshotManifest manifest = SnapshotManifest.open(TEST_UTIL.getConfiguration(), + TEST_UTIL.getTestFileSystem(), snapshotDir, snapshotDesc); + + Map regionSizes = + SnapshotRegionSizeCalculator.calculateRegionSizes(TEST_UTIL.getConfiguration(), manifest); + + for (Map.Entry entry : regionSizes.entrySet()) { + assertTrue("Region size should be 0.", entry.getValue() == 0); + } + + admin.deleteSnapshot(snapshotName); + + // table has some data + TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), Bytes.toBytes("info")); + admin.snapshot(snapshotName, tableName); + snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, + TEST_UTIL.getDefaultRootDirPath()); + snapshotDesc = + SnapshotDescriptionUtils.readSnapshotInfo(TEST_UTIL.getTestFileSystem(), snapshotDir); + manifest = SnapshotManifest.open(TEST_UTIL.getConfiguration(), TEST_UTIL.getTestFileSystem(), + snapshotDir, snapshotDesc); + regionSizes = + SnapshotRegionSizeCalculator.calculateRegionSizes(TEST_UTIL.getConfiguration(), manifest); + for (Map.Entry entry : regionSizes.entrySet()) { + assertTrue("Region size should be greater than 0.", entry.getValue() > 0); + } + + TEST_UTIL.deleteTable(tableName); + admin.deleteSnapshot(snapshotName); + } +}