Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
afa8b6c
Recognizes expectedDataGeneration = -1L as a sentinel for "create if …
peterxcli Nov 20, 2025
7868a86
correct the new result code position
peterxcli Nov 24, 2025
d630699
Merge remote-tracking branch 'upstream/master' into HDDS-13963-atomic…
peterxcli Nov 26, 2025
f7ad505
Merge remote-tracking branch 'upstream/master' into HDDS-13963-atomic…
peterxcli Dec 3, 2025
2fdb6de
Merge remote-tracking branch 'upstream/master' into HDDS-13963-atomic…
peterxcli Dec 3, 2025
da492ce
HDDS-14070. Update key generation mismatch handling in Ozone RPC clie…
peterxcli Dec 3, 2025
0d4dfb0
Introduce EXPECTED_GEN_CREATE_IF_NOT_EXISTS constant
peterxcli Dec 3, 2025
f238ade
Merge remote-tracking branch 'upstream/master' into HDDS-13963-atomic…
peterxcli Dec 25, 2025
4e9665c
revert the new KEY_GENERATION_MISMATCH error code
peterxcli Jan 2, 2026
b5249f1
Add tests for atomic key creation and rewriting with expected generat…
peterxcli Jan 2, 2026
817e33f
Recognizes expectedDataGeneration = -1L as a sentinel for "create if …
peterxcli Nov 20, 2025
021747b
correct the new result code position
peterxcli Nov 24, 2025
0fd6730
HDDS-14070. Update key generation mismatch handling in Ozone RPC clie…
peterxcli Dec 3, 2025
c64beca
Introduce EXPECTED_GEN_CREATE_IF_NOT_EXISTS constant
peterxcli Dec 3, 2025
6b33766
revert the new KEY_GENERATION_MISMATCH error code
peterxcli Jan 2, 2026
83ef2ea
Add tests for atomic key creation and rewriting with expected generat…
peterxcli Jan 2, 2026
6c8a08e
Implement S3 conditional write support
peterxcli Feb 24, 2026
57d7788
Add aws kit tests for S3 conditional object put
peterxcli Mar 12, 2026
6d85ee1
Add tests for S3 object put with conditional headers
peterxcli Mar 25, 2026
c15cd6a
Merge branch 'master' into HDDS-13919-conditional-writes
peterxcli Mar 25, 2026
88aa08e
Merge branch 'master' into HDDS-13963-atomic-create-if-not-exists
peterxcli Mar 25, 2026
a1898c0
Add ATOMIC_CREATE_IF_NOT_EXISTS version gate
peterxcli Mar 25, 2026
e5f5e3d
suggestion
peterxcli Mar 25, 2026
86489b6
fix pmd
peterxcli Mar 25, 2026
9081e96
Merge branch 'HDDS-13963-atomic-create-if-not-exists' into HDDS-13919…
peterxcli Mar 25, 2026
d6395a9
fix pmd
peterxcli Mar 25, 2026
c10fb9e
fix new smoke test
peterxcli Mar 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,8 @@ public final class OzoneConsts {
public static final String TENANT = "tenant";
public static final String USER_PREFIX = "userPrefix";
public static final String REWRITE_GENERATION = "rewriteGeneration";
/** Sentinel generation used to request atomic create-if-not-exists(put if absent) semantics. */
public static final long EXPECTED_GEN_CREATE_IF_NOT_EXISTS = -1L;
public static final String FROM_SNAPSHOT = "fromSnapshot";
public static final String TO_SNAPSHOT = "toSnapshot";
public static final String TOKEN = "token";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ public enum OzoneManagerVersion implements ComponentVersion {

S3_LIST_MULTIPART_UPLOADS_PAGINATION(11,
"OzoneManager version that supports S3 list multipart uploads API with pagination"),

ATOMIC_CREATE_IF_NOT_EXISTS(12,
"OzoneManager version that supports explicit create-if-not-exists key semantics"),

FUTURE_VERSION(-1, "Used internally in the client when the server side is "
+ " newer and an unknown server version has arrived to the client.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ public OzoneOutputStream createKey(String key, long size,
*
* @param keyName Existing key to rewrite. This must exist in the bucket.
* @param size The size of the new key
* @param existingKeyGeneration The generation of the existing key which is checked for changes at key create
* @param existingKeyGeneration The positive generation of the existing key which is checked for changes at key create
* and commit time.
* @param replicationConfig The replication configuration for the key to be rewritten.
* @param metadata custom key value metadata
Expand All @@ -520,6 +520,44 @@ public OzoneOutputStream rewriteKey(String keyName, long size, long existingKeyG
return proxy.rewriteKey(volumeName, name, keyName, size, existingKeyGeneration, replicationConfig, metadata);
}

/**
* Creates a key only if it does not exist (S3 If-None-Match: * semantics).
*
* @param keyName Name of the key
* @param size Size of the data
* @param replicationConfig Replication configuration
* @param metadata custom key value metadata
* @param tags Tags used for S3 object tags
* @return OzoneOutputStream to which the data has to be written.
* @throws IOException
*/
public OzoneOutputStream createKeyIfNotExists(String keyName, long size,
ReplicationConfig replicationConfig, Map<String, String> metadata,
Map<String, String> tags) throws IOException {
return proxy.createKeyIfNotExists(volumeName, name, keyName, size,
replicationConfig, metadata, tags);
}

/**
* Rewrites a key only if its ETag matches (S3 If-Match semantics).
*
* @param keyName Name of the key
* @param size Size of the data
* @param expectedETag The ETag value the existing key must have
* @param replicationConfig Replication configuration
* @param metadata custom key value metadata
* @param tags Tags used for S3 object tags
* @return OzoneOutputStream to which the data has to be written.
* @throws IOException
*/
public OzoneOutputStream rewriteKeyIfMatch(String keyName, long size,
String expectedETag, ReplicationConfig replicationConfig,
Map<String, String> metadata, Map<String, String> tags)
throws IOException {
return proxy.rewriteKeyIfMatch(volumeName, name, keyName, size,
expectedETag, replicationConfig, metadata, tags);
}

/**
* Creates a new key in the bucket, with default replication type RATIS and
* with replication factor THREE.
Expand Down Expand Up @@ -1047,8 +1085,8 @@ public List<OzoneFileStatus> listStatus(String keyName, boolean recursive,
*
* @param prefix Optional string to filter for the selected keys.
*/
public OzoneMultipartUploadList listMultipartUploads(String prefix,
String keyMarker, String uploadIdMarker, int maxUploads)
public OzoneMultipartUploadList listMultipartUploads(String prefix,
String keyMarker, String uploadIdMarker, int maxUploads)
throws IOException {
return proxy.listMultipartUploads(volumeName, getName(), prefix, keyMarker, uploadIdMarker, maxUploads);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ OzoneOutputStream createKey(String volumeName, String bucketName,
* @param bucketName Name of the Bucket
* @param keyName Existing key to rewrite. This must exist in the bucket.
* @param size The size of the new key
* @param existingKeyGeneration The generation of the existing key which is checked for changes at key create
* @param existingKeyGeneration The positive generation of the existing key which is checked for changes at key create
* and commit time.
* @param replicationConfig The replication configuration for the key to be rewritten.
* @param metadata custom key value metadata
Expand All @@ -379,6 +379,44 @@ OzoneOutputStream rewriteKey(String volumeName, String bucketName, String keyNam
long size, long existingKeyGeneration, ReplicationConfig replicationConfig,
Map<String, String> metadata) throws IOException;

/**
* Creates a key only if it does not exist (S3 If-None-Match: * semantics).
*
* @param volumeName Name of the Volume
* @param bucketName Name of the Bucket
* @param keyName Name of the Key
* @param size Size of the data
* @param replicationConfig The replication configuration
* @param metadata custom key value metadata
* @param tags Tags used for S3 object tags
* @return {@link OzoneOutputStream}
* @throws OMException with KEY_ALREADY_EXISTS if key exists
*/
OzoneOutputStream createKeyIfNotExists(String volumeName, String bucketName,
String keyName, long size, ReplicationConfig replicationConfig,
Map<String, String> metadata, Map<String, String> tags)
throws IOException;

/**
* Rewrites a key only if its ETag matches (S3 If-Match semantics).
*
* @param volumeName Name of the Volume
* @param bucketName Name of the Bucket
* @param keyName Name of the Key
* @param size Size of the data
* @param expectedETag The ETag value the existing key must have
* @param replicationConfig The replication configuration
* @param metadata custom key value metadata
* @param tags Tags used for S3 object tags
* @return {@link OzoneOutputStream}
* @throws OMException with ETAG_MISMATCH, ETAG_NOT_AVAILABLE, or KEY_NOT_FOUND
*/
@SuppressWarnings("checkstyle:parameternumber")
OzoneOutputStream rewriteKeyIfMatch(String volumeName, String bucketName,
String keyName, long size, String expectedETag,
ReplicationConfig replicationConfig, Map<String, String> metadata,
Map<String, String> tags) throws IOException;

/**
* Writes a key in an existing bucket.
* @param volumeName Name of the Volume
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ public void createBucket(
builder.setDefaultReplicationConfig(defaultReplicationConfig);
}

String replicationType = defaultReplicationConfig == null
String replicationType = defaultReplicationConfig == null
? "server-side default replication type"
: defaultReplicationConfig.getType().toString();

Expand Down Expand Up @@ -1317,7 +1317,7 @@ public List<OzoneBucket> listBuckets(String volumeName, String bucketPrefix,
List<OmBucketInfo> buckets = ozoneManagerClient.listBuckets(
volumeName, prevBucket, bucketPrefix, maxListResult, hasSnapshot);

return buckets.stream().map(bucket ->
return buckets.stream().map(bucket ->
OzoneBucket.newBuilder(conf, this)
.setVolumeName(bucket.getVolumeName())
.setName(bucket.getBucketName())
Expand Down Expand Up @@ -1408,6 +1408,9 @@ public OzoneOutputStream rewriteKey(String volumeName, String bucketName, String
if (omVersion.compareTo(OzoneManagerVersion.ATOMIC_REWRITE_KEY) < 0) {
throw new IOException("OzoneManager does not support atomic key rewrite.");
}
Preconditions.checkArgument(existingKeyGeneration > 0,
"existingKeyGeneration must be positive, but was %s",
existingKeyGeneration);

createKeyPreChecks(volumeName, bucketName, keyName, replicationConfig);

Expand All @@ -1432,6 +1435,68 @@ public OzoneOutputStream rewriteKey(String volumeName, String bucketName, String
return createOutputStream(openKey);
}

@Override
public OzoneOutputStream createKeyIfNotExists(String volumeName,
String bucketName, String keyName, long size,
ReplicationConfig replicationConfig, Map<String, String> metadata,
Map<String, String> tags) throws IOException {
if (omVersion.compareTo(OzoneManagerVersion.ATOMIC_REWRITE_KEY) < 0) {
throw new IOException(
"OzoneManager does not support atomic key creation.");
}

createKeyPreChecks(volumeName, bucketName, keyName, replicationConfig);

OmKeyArgs.Builder builder = new OmKeyArgs.Builder()
.setVolumeName(volumeName)
.setBucketName(bucketName)
.setKeyName(keyName)
.setDataSize(size)
.setReplicationConfig(replicationConfig)
.addAllMetadataGdpr(metadata)
.addAllTags(tags)
.setLatestVersionLocation(getLatestVersionLocation)
.setExpectedDataGeneration(
OzoneConsts.EXPECTED_GEN_CREATE_IF_NOT_EXISTS);

OpenKeySession openKey = ozoneManagerClient.openKey(builder.build());
if (isS3GRequest.get() && size == 0) {
openKey.getKeyInfo().setDataSize(0);
}
return createOutputStream(openKey);
}

@Override
@SuppressWarnings("checkstyle:parameternumber")
public OzoneOutputStream rewriteKeyIfMatch(String volumeName,
String bucketName, String keyName, long size, String expectedETag,
ReplicationConfig replicationConfig, Map<String, String> metadata,
Map<String, String> tags) throws IOException {
if (omVersion.compareTo(OzoneManagerVersion.ATOMIC_REWRITE_KEY) < 0) {
throw new IOException(
"OzoneManager does not support conditional key rewrite.");
}

createKeyPreChecks(volumeName, bucketName, keyName, replicationConfig);

OmKeyArgs.Builder builder = new OmKeyArgs.Builder()
.setVolumeName(volumeName)
.setBucketName(bucketName)
.setKeyName(keyName)
.setDataSize(size)
.setReplicationConfig(replicationConfig)
.addAllMetadataGdpr(metadata)
.addAllTags(tags)
.setLatestVersionLocation(getLatestVersionLocation)
.setExpectedETag(expectedETag);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it set setExpectedDataGeneration here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, please take a look at the design doc:

#### If-Match Implementation
To optimize performance and reduce latency, we avoid a pre-flight check (GetS3KeyDetails) and instead validate the ETag during the OM Write operation.
This requires adding an optional `expectedETag` field to `KeyArgs`. This approach optimizes the "happy path" (successful match) by removing an extra network round trip.
For failing requests, they still incur the cost of a write RPC and Raft log entry, but this is acceptable under optimistic concurrency control assumptions.
##### S3 Gateway Layer
1. Parse `If-Match: "<etag>"` header.
2. Populate `KeyArgs` with the parsed `expectedETag`.
3. Send the write request (CreateKey) to OM.
##### OM Create Phase
Validation is performed within the `validateAndUpdateCache` method to ensure atomicity within the Ratis state machine application.
1. **Locking**: The OM acquires the write lock for the bucket/key.
2. **Key Lookup**: Retrieve the existing key from `KeyTable`.
3. **Validation**:
- **Key Not Found**: If the key does not exist, throw `KEY_NOT_FOUND` (maps to S3 412).
- **No ETag Metadata**: If the existing key (e.g., uploaded via OFS) does not have an ETag property, throw `ETAG_NOT_AVAILABLE` (maps to S3 412). The precondition cannot be evaluated, so we must fail rather than silently proceed.
- **ETag Mismatch**: Compare `existingKey.ETag` with `expectedETag`. If they do not match, throw `ETAG_MISMATCH` (maps to S3 412).
4. **Extract Generation**: If ETag matches, extract `existingKey.updateID`.
5. **Create Open Key**: Create open key entry with `expectedDataGeneration = existingKey.updateID`.


OpenKeySession openKey = ozoneManagerClient.openKey(builder.build());
if (isS3GRequest.get() && size == 0) {
openKey.getKeyInfo().setDataSize(0);
}
return createOutputStream(openKey);
}

private void createKeyPreChecks(String volumeName, String bucketName, String keyName,
ReplicationConfig replicationConfig) throws IOException {
verifyVolumeName(volumeName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ public enum ResultCodes {
USER_MISMATCH, // Error code when requested user name passed is different
// from remote user.

INVALID_PART, // When part name is not found or not matching with partname
INVALID_PART, // When part name is not found or not matching with partname
// in OM MPU partInfo.

INVALID_PART_ORDER, // When list of parts mentioned to complete MPU are not
Expand Down Expand Up @@ -267,13 +267,17 @@ public enum ResultCodes {
UNAUTHORIZED,

S3_SECRET_ALREADY_EXISTS,

INVALID_PATH,
TOO_MANY_BUCKETS,
KEY_UNDER_LEASE_RECOVERY,
KEY_ALREADY_CLOSED,
KEY_UNDER_LEASE_SOFT_LIMIT_PERIOD,

TOO_MANY_SNAPSHOTS,

ETAG_MISMATCH,

ETAG_NOT_AVAILABLE,
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ public final class OmKeyArgs extends WithMetadata implements Auditable {
// This allows a key to be created an committed atomically if the original has not
// been modified.
private Long expectedDataGeneration = null;
private final String expectedETag;

private OmKeyArgs(Builder b) {
super(b);
Expand All @@ -82,6 +83,7 @@ private OmKeyArgs(Builder b) {
this.ownerName = b.ownerName;
this.tags = b.tags.build();
this.expectedDataGeneration = b.expectedDataGeneration;
this.expectedETag = b.expectedETag;
}

public boolean getIsMultipartKey() {
Expand Down Expand Up @@ -164,6 +166,10 @@ public Long getExpectedDataGeneration() {
return expectedDataGeneration;
}

public String getExpectedETag() {
return expectedETag;
}

@Override
public Map<String, String> toAuditMap() {
Map<String, String> auditMap = new LinkedHashMap<>();
Expand Down Expand Up @@ -209,6 +215,9 @@ public KeyArgs toProtobuf() {
if (expectedDataGeneration != null) {
builder.setExpectedDataGeneration(expectedDataGeneration);
}
if (expectedETag != null) {
builder.setExpectedETag(expectedETag);
}
return builder.build();
}

Expand All @@ -234,6 +243,7 @@ public static class Builder extends WithMetadata.Builder {
private boolean forceUpdateContainerCacheFromSCM;
private final MapBuilder<String, String> tags;
private Long expectedDataGeneration = null;
private String expectedETag;

public Builder() {
this(AclListBuilder.empty());
Expand Down Expand Up @@ -263,6 +273,7 @@ public Builder(OmKeyArgs obj) {
this.forceUpdateContainerCacheFromSCM =
obj.forceUpdateContainerCacheFromSCM;
this.expectedDataGeneration = obj.expectedDataGeneration;
this.expectedETag = obj.expectedETag;
this.tags = MapBuilder.of(obj.tags);
this.acls = AclListBuilder.of(obj.acls);
}
Expand Down Expand Up @@ -398,6 +409,11 @@ public Builder setExpectedDataGeneration(long generation) {
return this;
}

public Builder setExpectedETag(String eTag) {
this.expectedETag = eTag;
return this;
}

public OmKeyArgs build() {
return new OmKeyArgs(this);
}
Expand Down
Loading