Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,17 @@ public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate
this.delegate = delegate;
}

/**
* @deprecated since 4.0.0. Use
* {@link #TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit)}. This
* constructor will be removed in a future.
* @see <a href="https://issues.apache.org/jira/browse/HBASE-29272">HBASE-29272</a>
*/
@Deprecated
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add javadoc and deprecated tag to specify the life cycle for these APIs. You can find some examples in the current code base. And please also add some docs to explain why it is deprecated.

public TableSnapshotRegionSplit(TableDescriptor htd, RegionInfo regionInfo,
List<String> locations, Scan scan, Path restoreDir) {
this.delegate =
new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
this.delegate = new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan,
restoreDir, 1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,17 @@ public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate
this.delegate = delegate;
}

/**
* @deprecated since 4.0.0. Use
* {@link #TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit)}. This
* constructor will be removed in a future.
* @see <a href="https://issues.apache.org/jira/browse/HBASE-29272">HBASE-29272</a>
*/
@Deprecated
public TableSnapshotRegionSplit(TableDescriptor htd, RegionInfo regionInfo,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The class is marked as IA.Public, so you can not delete a public method from it directly. You need to make it deprecated for a whole major release cycle before deleteing.

List<String> locations, Scan scan, Path restoreDir) {
this.delegate =
new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
this.delegate = new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan,
restoreDir, 1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
Expand All @@ -47,6 +48,7 @@
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
import org.apache.hadoop.hbase.snapshot.SnapshotRegionSizeCalculator;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.RegionSplitter;
Expand Down Expand Up @@ -145,13 +147,14 @@ public static class InputSplit implements Writable {
private String[] locations;
private String scan;
private String restoreDir;
private long length;

// constructor for mapreduce framework / Writable
public InputSplit() {
}

public InputSplit(TableDescriptor htd, RegionInfo regionInfo, List<String> locations, Scan scan,
Path restoreDir) {
Path restoreDir, long length) {
this.htd = htd;
this.regionInfo = regionInfo;
if (locations == null || locations.isEmpty()) {
Expand All @@ -166,6 +169,7 @@ public InputSplit(TableDescriptor htd, RegionInfo regionInfo, List<String> locat
}

this.restoreDir = restoreDir.toString();
this.length = length;
}

public TableDescriptor getHtd() {
Expand All @@ -181,8 +185,7 @@ public String getRestoreDir() {
}

public long getLength() {
// TODO: We can obtain the file sizes of the snapshot here.
return 0;
return length;
}

public String[] getLocations() {
Expand Down Expand Up @@ -440,6 +443,8 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
}
}

Map<String, Long> regionSizes =
SnapshotRegionSizeCalculator.calculateRegionSizes(conf, manifest);
List<InputSplit> splits = new ArrayList<>();
for (RegionInfo hri : regionManifests) {
// load region descriptor
Expand All @@ -455,7 +460,7 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
hosts = calculateLocationsForInputSplit(conf, htd, hri, tableDir);
}
}

long snapshotRegionSize = regionSizes.getOrDefault(hri.getEncodedName(), 0L);
if (numSplits > 1) {
byte[][] sp = sa.split(hri.getStartKey(), hri.getEndKey(), numSplits, true);
for (int i = 0; i < sp.length - 1; i++) {
Expand All @@ -478,7 +483,8 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
Bytes.compareTo(scan.getStopRow(), sp[i + 1]) < 0 ? scan.getStopRow() : sp[i + 1]);
}

splits.add(new InputSplit(htd, hri, hosts, boundedScan, restoreDir));
splits.add(new InputSplit(htd, hri, hosts, boundedScan, restoreDir,
snapshotRegionSize / numSplits));
}
}
} else {
Expand All @@ -487,7 +493,7 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
hri.getEndKey())
) {

splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir, snapshotRegionSize));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
import org.apache.hbase.thirdparty.org.apache.commons.cli.DefaultParser;
import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;

import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
Expand Down Expand Up @@ -151,6 +150,7 @@ String getStateToString() {
private AtomicInteger logsCount = new AtomicInteger();
private AtomicLong hfilesArchiveSize = new AtomicLong();
private AtomicLong hfilesSize = new AtomicLong();
private Map<String, Long> regionSizeMap = new ConcurrentHashMap<>();
private AtomicLong hfilesMobSize = new AtomicLong();
private AtomicLong nonSharedHfilesArchiveSize = new AtomicLong();
private AtomicLong logSize = new AtomicLong();
Expand All @@ -176,6 +176,14 @@ String getStateToString() {
this.fs = fs;
}

/**
* Returns the map containing region sizes.
* @return A map where keys are region names and values are their corresponding sizes.
*/
public Map<String, Long> getRegionSizeMap() {
return regionSizeMap;
}

/** Returns the snapshot descriptor */
public SnapshotDescription getSnapshotDescription() {
return ProtobufUtil.createSnapshotDesc(this.snapshot);
Expand Down Expand Up @@ -347,6 +355,12 @@ FileInfo addStoreFile(final RegionInfo region, final String family,
return new FileInfo(inArchive, size, isCorrupted);
}

void updateRegionSizeMap(final RegionInfo region,
final SnapshotRegionManifest.StoreFile storeFile) {
long currentSize = regionSizeMap.getOrDefault(region.getEncodedName(), 0L);
regionSizeMap.put(region.getEncodedName(), currentSize + storeFile.getFileSize());
}

/**
* Add the specified log file to the stats
* @param server server name
Expand Down Expand Up @@ -613,6 +627,7 @@ public void storeFile(final RegionInfo regionInfo, final String family,
if (!storeFile.hasReference()) {
stats.addStoreFile(regionInfo, family, storeFile, filesMap);
}
stats.updateRegionSizeMap(regionInfo, storeFile);
}
});
return stats;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.snapshot;

import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hbase.snapshot.SnapshotInfo.SnapshotStats;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.yetus.audience.InterfaceAudience;

import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;

/**
* Utility class to calculate the size of each region in a snapshot.
*/
@InterfaceAudience.Private
public class SnapshotRegionSizeCalculator {

/**
* Calculate the size of each region in the snapshot.
* @return A map of region encoded names to their total size in bytes.
* @throws IOException If an error occurs during calculation.
*/
public static Map<String, Long> calculateRegionSizes(Configuration conf,
SnapshotManifest manifest) throws IOException {
FileSystem fs = FileSystem.get(CommonFSUtils.getRootDir(conf).toUri(), conf);
SnapshotProtos.SnapshotDescription snapshot =
SnapshotDescriptionUtils.readSnapshotInfo(fs, manifest.getSnapshotDir());
SnapshotStats stats = SnapshotInfo.getSnapshotStats(conf, snapshot, null);
return stats.getRegionSizeMap();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.snapshot;

import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;

import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;

@Category(SmallTests.class)
public class TestSnapshotRegionSizeCalculator {

@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSnapshotRegionSizeCalculator.class);

private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
private static FileSystem fs;
private static Path rootDir;
private static Path snapshotDir;
private static SnapshotProtos.SnapshotDescription snapshotDesc;
private static SnapshotManifest manifest;
private static Admin admin;
private static Configuration conf;

@BeforeClass
public static void setupCluster() throws Exception {
TEST_UTIL.startMiniCluster(3);
conf = TEST_UTIL.getConfiguration();
fs = TEST_UTIL.getTestFileSystem();
rootDir = TEST_UTIL.getDataTestDir("TestSnapshotRegionSizeCalculator");
CommonFSUtils.setRootDir(conf, rootDir);
admin = TEST_UTIL.getAdmin();
}

@AfterClass
public static void tearDown() throws Exception {
fs.delete(rootDir, true);
TEST_UTIL.shutdownMiniCluster();
}

@Test
public void testCalculateRegionSize() throws IOException {
TableName tableName = TableName.valueOf("test_table");
String snapshotName = "test_snapshot";

// table has no data
TEST_UTIL.createTable(tableName, Bytes.toBytes("info"));
admin = TEST_UTIL.getConnection().getAdmin();
admin.snapshot(snapshotName, tableName);
Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName,
TEST_UTIL.getDefaultRootDirPath());
SnapshotProtos.SnapshotDescription snapshotDesc =
SnapshotDescriptionUtils.readSnapshotInfo(TEST_UTIL.getTestFileSystem(), snapshotDir);
SnapshotManifest manifest = SnapshotManifest.open(TEST_UTIL.getConfiguration(),
TEST_UTIL.getTestFileSystem(), snapshotDir, snapshotDesc);

Map<String, Long> regionSizes =
SnapshotRegionSizeCalculator.calculateRegionSizes(TEST_UTIL.getConfiguration(), manifest);

for (Map.Entry<String, Long> entry : regionSizes.entrySet()) {
assertTrue("Region size should be 0.", entry.getValue() == 0);
}

admin.deleteSnapshot(snapshotName);

// table has some data
TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), Bytes.toBytes("info"));
admin.snapshot(snapshotName, tableName);
snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName,
TEST_UTIL.getDefaultRootDirPath());
snapshotDesc =
SnapshotDescriptionUtils.readSnapshotInfo(TEST_UTIL.getTestFileSystem(), snapshotDir);
manifest = SnapshotManifest.open(TEST_UTIL.getConfiguration(), TEST_UTIL.getTestFileSystem(),
snapshotDir, snapshotDesc);
regionSizes =
SnapshotRegionSizeCalculator.calculateRegionSizes(TEST_UTIL.getConfiguration(), manifest);
for (Map.Entry<String, Long> entry : regionSizes.entrySet()) {
assertTrue("Region size should be greater than 0.", entry.getValue() > 0);
}

TEST_UTIL.deleteTable(tableName);
admin.deleteSnapshot(snapshotName);
}
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello @guluo2016 , I have seen your comments. I don't know why the test case didn't pass a few days ago, busy resolving it😭... Now it is okay, please review the latest changes, let the test case for two scenarios:

  1. table has no data, and region size is 0
  2. table has some data, and region size is greater than 0