From be5a6ef2202c4b6161f412f871d73baa78c27d8d Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 20 Jan 2026 15:24:33 -0800 Subject: [PATCH 1/4] CASSANDRA-21127: Added user facing docs for accord --- doc/modules/cassandra/nav.adoc | 2 + .../cassandra/pages/developing/cql/dml.adoc | 93 ++ .../cassandra/pages/developing/cql/index.adoc | 2 + .../developing/cql/transactions-examples.adoc | 416 +++++++++ .../pages/developing/cql/transactions.adoc | 862 ++++++++++++++++++ 5 files changed, 1375 insertions(+) create mode 100644 doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc create mode 100644 doc/modules/cassandra/pages/developing/cql/transactions.adoc diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc index 09fcdd884305..5968c0718a39 100644 --- a/doc/modules/cassandra/nav.adoc +++ b/doc/modules/cassandra/nav.adoc @@ -43,6 +43,8 @@ *** xref:cassandra:developing/cql/types.adoc[Data types] *** xref:cassandra:developing/cql/ddl.adoc[Data definition (DDL)] *** xref:cassandra:developing/cql/dml.adoc[Data manipulation (DML)] +*** xref:cassandra:developing/cql/transactions.adoc[Transactions] +*** xref:cassandra:developing/cql/transactions-examples.adoc[Transaction examples] *** xref:cassandra:developing/cql/dynamic-data-masking.adoc[] *** xref:cassandra:developing/cql/operators.adoc[Operators] *** xref:cassandra:developing/cql/indexing/indexing-concepts.adoc[] diff --git a/doc/modules/cassandra/pages/developing/cql/dml.adoc b/doc/modules/cassandra/pages/developing/cql/dml.adoc index 4f13ba2b0c52..e57ed59f9e8f 100644 --- a/doc/modules/cassandra/pages/developing/cql/dml.adoc +++ b/doc/modules/cassandra/pages/developing/cql/dml.adoc @@ -474,3 +474,96 @@ partly applied. Use the `COUNTER` option for batched counter updates. Unlike other updates in Cassandra, counter updates are not idempotent. + +=== BATCH vs Transactions + +While BATCH statements provide atomicity for multiple operations, they have limitations compared to Accord transactions. Understanding the differences helps you choose the right approach for your use case. + +==== BATCH Statement Capabilities + +BATCH statements offer: + +* **Single-partition atomicity**: All operations within a single partition succeed or fail together atomically +* **Single partition isolation**: Operations on the same partition are isolated +* **Network efficiency**: Multiple operations in one round-trip +* **Automatic timestamps**: All operations use the same timestamp by default +* **Conditional updates**: Batches support `IF` clauses for lightweight transactions (LWT/CAS) on a single partition. If any condition fails, the entire batch is rejected. +* **Basic business rule enforcement**: `IF` clauses can validate conditions before applying changes, though limited to what CAS syntax supports + +==== BATCH Statement Limitations + +However, BATCH statements cannot provide: + +* **Cross-partition atomicity**: Multi-partition LOGGED batches use a batch log to ensure eventual completion, but are not truly atomic. There can be a window where only some operations are visible. +* **Read-before-write patterns**: Cannot read data within the batch to make decisions based on current values +* **Cross-partition conditional updates**: Conditional batches (using `IF` clauses) must operate on a single partition and table +* **Complex conditional logic**: `IF` clauses are limited to simple equality/inequality checks on existing column values + +==== When to Use Accord Transactions Instead + +Consider using xref:developing/cql/transactions.adoc[Accord transactions] when you need: + +**Read-Modify-Write Patterns:** +[source,cql] +---- +-- BATCH cannot check balance before transfer +BEGIN BATCH + UPDATE accounts SET balance = balance - 100 WHERE user_id = 1; + UPDATE accounts SET balance = balance + 100 WHERE user_id = 2; +APPLY BATCH; + +-- Transaction can validate before transfer +BEGIN TRANSACTION + LET sender = (SELECT balance FROM accounts WHERE user_id = 1); + + IF sender.balance >= 100 THEN + UPDATE accounts SET balance = balance - 100 WHERE user_id = 1; + UPDATE accounts SET balance = balance + 100 WHERE user_id = 2; + END IF +COMMIT TRANSACTION +---- + +**Complex Business Logic:** + +NOTE: Row reference arithmetic in SET clauses and comparing two row references are not currently supported. Pass values as parameters instead. + +[source,cql] +---- +-- Application code: +-- double productPrice = 50.00; // Retrieved from products table + +-- Transaction can enforce multi-step business rules +BEGIN TRANSACTION + LET user_account = (SELECT balance, status FROM accounts WHERE user_id = ?); + LET product_info = (SELECT quantity FROM products WHERE id = ?); + + IF user_account.status = 'active' + AND user_account.balance >= ? -- Pass product_price as parameter + AND product_info.quantity > 0 THEN + + UPDATE accounts SET balance = balance - ? WHERE user_id = ?; -- Pass product_price + UPDATE products SET quantity = quantity - 1 WHERE id = ?; + INSERT INTO orders (id, user_id, product_id, amount) VALUES (?, ?, ?, ?); -- Pass product_price + END IF +COMMIT TRANSACTION +---- + +**Error Prevention:** +Transactions prevent invalid operations like: + +* Overdrawing accounts +* Selling out-of-stock items +* Creating duplicate records +* Violating business constraints + +==== When to Continue Using BATCH + +BATCH statements remain appropriate for: + +* **Simple multi-table updates** without conditional logic +* **Same-partition operations** where isolation is sufficient +* **High-throughput scenarios** where transaction overhead isn't justified +* **Counter updates** (transactions don't support counters) +* **Backward compatibility** with existing applications + +For detailed transaction syntax and examples, see the xref:developing/cql/transactions.adoc[Transactions] and xref:developing/cql/transactions-examples.adoc[Transaction Examples] documentation. diff --git a/doc/modules/cassandra/pages/developing/cql/index.adoc b/doc/modules/cassandra/pages/developing/cql/index.adoc index 97204ce5687e..8e5e9a8afef8 100644 --- a/doc/modules/cassandra/pages/developing/cql/index.adoc +++ b/doc/modules/cassandra/pages/developing/cql/index.adoc @@ -13,6 +13,8 @@ For that reason, when used in this document, these terms (tables, rows and colum * xref:developing/cql/types.adoc[Data types] * xref:developing/cql/ddl.adoc[Data definition language] * xref:developing/cql/dml.adoc[Data manipulation language] +* xref:developing/cql/transactions.adoc[Transactions] +* xref:developing/cql/transactions-examples.adoc[Transaction examples] * xref:developing/cql/dynamic-data-masking.adoc[Dynamic data masking] * xref:developing/cql/operators.adoc[Operators] * xref:developing/cql/indexing/indexing-concepts.adoc[Indexing] diff --git a/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc b/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc new file mode 100644 index 000000000000..df1753137260 --- /dev/null +++ b/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc @@ -0,0 +1,416 @@ += Accord Transaction Design Patterns +:page-nav-title: Transaction Patterns + +This page provides advanced design patterns for Accord transactions. These patterns solve common distributed system challenges that were difficult or impossible to address with eventual consistency. + +For basic syntax, getting started, and migration guides, see xref:developing/cql/transactions.adoc[Accord Transactions]. + +== Pattern: Synchronous Unique Constraints + +Cassandra's primary key enforces uniqueness, but what if you need uniqueness on a non-primary-key column like `email` or `username`? This pattern uses sidecar index tables to enforce multiple unique constraints atomically. + +=== The Challenge + +You have a `users` table keyed by `user_id`, but you also need: + +* Unique `email` addresses +* Unique `username` values +* The ability to **change** email or username while maintaining uniqueness + +=== Schema + +[source,cql] +---- +CREATE TABLE users ( + user_id uuid PRIMARY KEY, + username text, + email text, + display_name text, + created_at timestamp +) WITH transactional_mode = 'full'; + +-- Sidecar tables for uniqueness enforcement +CREATE TABLE username_index ( + username text PRIMARY KEY, + user_id uuid +) WITH transactional_mode = 'full'; + +CREATE TABLE email_index ( + email text PRIMARY KEY, + user_id uuid +) WITH transactional_mode = 'full'; +---- + +=== Creating a User with Unique Constraints + +[source,cql] +---- +BEGIN TRANSACTION + LET existing_username = (SELECT user_id FROM username_index WHERE username = ? LIMIT 1); + LET existing_email = (SELECT user_id FROM email_index WHERE email = ? LIMIT 1); + + IF existing_username IS NULL AND existing_email IS NULL THEN + INSERT INTO users (user_id, username, email, display_name, created_at) + VALUES (?, ?, ?, ?, toTimestamp(now())); + + INSERT INTO username_index (username, user_id) VALUES (?, ?); + INSERT INTO email_index (email, user_id) VALUES (?, ?); + END IF +COMMIT TRANSACTION +---- + +=== Changing a Username (The Hard Part) + +Renaming requires atomically: (1) verifying the new name is available, (2) deleting the old index entry, and (3) inserting the new one. Without transactions, a crash between steps could leave orphaned index entries or allow duplicates. + +[source,cql] +---- +-- Application provides: user_id, old_username, new_username +BEGIN TRANSACTION + LET current_user = (SELECT username FROM users WHERE user_id = ?); + LET new_name_owner = (SELECT user_id FROM username_index WHERE username = ? LIMIT 1); + + -- Verify: user exists, old username matches, new username is available + IF current_user IS NOT NULL + AND current_user.username = ? -- old_username parameter + AND new_name_owner IS NULL THEN + + -- Update the user record + UPDATE users SET username = ? WHERE user_id = ?; -- new_username + + -- Atomically swap index entries + DELETE FROM username_index WHERE username = ?; -- old_username + INSERT INTO username_index (username, user_id) VALUES (?, ?); -- new_username, user_id + END IF +COMMIT TRANSACTION +---- + +This pattern ensures the index tables are always consistent with the `users` table, even under concurrent modifications or partial failures. + +== Pattern: Distributed State Machine + +Many business objects follow a lifecycle with strict state transitions. An order might be `PENDING` -> `PAID` -> `SHIPPED` -> `DELIVERED`. Without transactions, concurrent operations (e.g., "cancel" and "ship") could both succeed, leaving the system in an invalid state. + +=== The Challenge + +* Ensure state transitions follow valid paths +* Prevent race conditions between competing operations +* Maintain audit trail of transitions + +NOTE: The `IN` and `OR` operators are not currently supported in transaction `IF` conditions. To check multiple valid states, use **numeric status codes** with range comparisons (e.g., `status_code < 30` to mean "any state before SHIPPED"). + +=== Schema + +[source,cql] +---- +CREATE TABLE orders ( + order_id uuid PRIMARY KEY, + customer_id uuid, + status_code int, -- 10=PENDING, 20=PAID, 30=SHIPPED, 40=DELIVERED, 99=CANCELLED + status_name text, + total_amount decimal, + updated_at timestamp +) WITH transactional_mode = 'full'; + +CREATE TABLE order_status_history ( + order_id uuid, + transition_time timestamp, + from_status int, + to_status int, + actor_id uuid, + PRIMARY KEY (order_id, transition_time) +) WITH transactional_mode = 'full'; +---- + +=== Valid Transition: PAID -> SHIPPED + +[source,cql] +---- +-- Application provides: order_id, actor_id +BEGIN TRANSACTION + LET current_order = (SELECT status_code FROM orders WHERE order_id = ?); + + -- Only allow transition from PAID (20) state + IF current_order IS NOT NULL AND current_order.status_code = 20 THEN + UPDATE orders + SET status_code = 30, status_name = 'SHIPPED', updated_at = toTimestamp(now()) + WHERE order_id = ?; + + INSERT INTO order_status_history (order_id, transition_time, from_status, to_status, actor_id) + VALUES (?, toTimestamp(now()), 20, 30, ?); + END IF +COMMIT TRANSACTION +---- + +=== Handling Cancellation (Competing Transition) + +Cancellation is only valid from certain states. If a "ship" and "cancel" operation race, exactly one will succeed. + +[source,cql] +---- +-- Application provides: order_id, actor_id +BEGIN TRANSACTION + LET current_order = (SELECT status_code, customer_id, total_amount FROM orders WHERE order_id = ?); + + -- Cancellation allowed only from PENDING (10) or PAID (20) states. + -- Use range comparison (status_code <= 20) since IN/OR are not supported. + IF current_order IS NOT NULL + AND current_order.status_code <= 20 THEN + + UPDATE orders + SET status_code = 99, status_name = 'CANCELLED', updated_at = toTimestamp(now()) + WHERE order_id = ?; + + INSERT INTO order_status_history (order_id, transition_time, from_status, to_status, actor_id) + VALUES (?, toTimestamp(now()), current_order.status_code, 99, ?); + END IF +COMMIT TRANSACTION +---- + +If a concurrent "ship" operation already moved the order to `SHIPPED` (30), the `status_code <= 20` condition fails and the cancellation is rejected. The application can check the result and inform the user. + +== Pattern: Synchronous Denormalization + +Cassandra best practices often involve denormalizing data for read performance. Keeping summary tables in sync with detail tables has traditionally been eventually consistent. Accord enables **synchronous denormalization** where aggregates are always accurate. + +=== The Challenge + +You have a `posts` table and want to maintain accurate counts per author without using counters (which aren't supported in transactions) and without eventual consistency lag. + +=== Schema + +[source,cql] +---- +CREATE TABLE posts ( + post_id uuid PRIMARY KEY, + author_id uuid, + title text, + content text, + status text, -- draft, published, archived + created_at timestamp +) WITH transactional_mode = 'full'; + +CREATE TABLE author_stats ( + author_id uuid PRIMARY KEY, + draft_count bigint, + published_count bigint, + archived_count bigint, + last_post_at timestamp +) WITH transactional_mode = 'full'; +---- + +=== Publishing a New Post (Increment Count) + +This example demonstrates **idempotent creation**: if the application retries a failed publish request, the post won't be duplicated and the counter won't be incremented twice. + +[source,cql] +---- +-- Application provides: post_id, author_id, title, content +BEGIN TRANSACTION + -- Check if this post already exists (idempotency guard) + LET existing_post = (SELECT status FROM posts WHERE post_id = ?); + + IF existing_post IS NULL THEN + -- Create the post + INSERT INTO posts (post_id, author_id, title, content, status, created_at) + VALUES (?, ?, ?, ?, 'published', toTimestamp(now())); + + -- Synchronously update the count exactly once + UPDATE author_stats + SET published_count = published_count + 1, + last_post_at = toTimestamp(now()) + WHERE author_id = ?; + END IF +COMMIT TRANSACTION +---- + +=== Changing Post Status (Transfer Between Counts) + +When a post moves from `published` to `archived`, both counts must update atomically. + +[source,cql] +---- +-- Application provides: post_id, author_id +BEGIN TRANSACTION + LET current_post = (SELECT status, author_id FROM posts WHERE post_id = ?); + + IF current_post IS NOT NULL AND current_post.status = 'published' THEN + -- Update post status + UPDATE posts SET status = 'archived' WHERE post_id = ?; + + -- Atomically transfer between counts + UPDATE author_stats + SET published_count = published_count - 1, + archived_count = archived_count + 1 + WHERE author_id = ?; -- author_id passed as parameter + END IF +COMMIT TRANSACTION +---- + +=== Deleting a Post (Decrement Count) + +[source,cql] +---- +-- Application provides: post_id, author_id, current_status +-- Application must query the post first to get status and author_id +BEGIN TRANSACTION + LET current_post = (SELECT status FROM posts WHERE post_id = ?); + + IF current_post IS NOT NULL AND current_post.status = ? THEN -- current_status parameter + -- Delete the post + DELETE FROM posts WHERE post_id = ?; + + -- Decrement the appropriate counter based on status + -- Application passes which counter to decrement based on current_status + UPDATE author_stats + SET published_count = published_count - ? -- pass 1 if published, 0 otherwise + WHERE author_id = ?; + END IF +COMMIT TRANSACTION +---- + +== Pattern: Multi-Entity Referential Integrity + +Relational databases use foreign keys to prevent orphaned records. In Cassandra, you can achieve similar guarantees with transactions. + +=== The Challenge + +* A `Task` must belong to an existing `Project` +* When a `Project` is deleted, handle its `Tasks` appropriately +* Prevent creating tasks for non-existent projects + +=== Schema + +[source,cql] +---- +CREATE TABLE projects ( + project_id uuid PRIMARY KEY, + name text, + owner_id uuid, + status text, -- active, completed, deleted + task_count bigint, + created_at timestamp +) WITH transactional_mode = 'full'; + +CREATE TABLE tasks ( + task_id uuid PRIMARY KEY, + project_id uuid, + title text, + status text, -- open, in_progress, done + assignee_id uuid, + created_at timestamp +) WITH transactional_mode = 'full'; + +-- Index for finding tasks by project (for cleanup operations) +CREATE TABLE tasks_by_project ( + project_id uuid, + task_id uuid, + title text, + status text, + PRIMARY KEY (project_id, task_id) +) WITH transactional_mode = 'full'; +---- + +=== Creating a Task (Enforce Parent Exists) + +[source,cql] +---- +-- Application provides: task_id, project_id, title, assignee_id +BEGIN TRANSACTION + LET project = (SELECT status FROM projects WHERE project_id = ?); + + -- Only create task if project exists and is active + IF project IS NOT NULL AND project.status = 'active' THEN + INSERT INTO tasks (task_id, project_id, title, status, assignee_id, created_at) + VALUES (?, ?, ?, 'open', ?, toTimestamp(now())); + + INSERT INTO tasks_by_project (project_id, task_id, title, status) + VALUES (?, ?, ?, 'open'); + + UPDATE projects SET task_count = task_count + 1 WHERE project_id = ?; + END IF +COMMIT TRANSACTION +---- + +=== Soft-Deleting a Project + +Rather than cascading deletes (which would require iterating over all tasks), mark the project as deleted. Tasks can be cleaned up asynchronously or remain for audit purposes. + +[source,cql] +---- +-- Application provides: project_id +BEGIN TRANSACTION + LET project = (SELECT status FROM projects WHERE project_id = ?); + + IF project IS NOT NULL AND project.status = 'active' THEN + UPDATE projects + SET status = 'deleted' + WHERE project_id = ?; + END IF +COMMIT TRANSACTION +---- + +Future task operations will fail the `project.status = 'active'` check, preventing modifications to a deleted project's tasks. + +=== Moving a Task Between Projects + +This pattern ensures both projects exist and are active, and maintains accurate task counts. + +[source,cql] +---- +-- Application provides: task_id, old_project_id, new_project_id, task_title, task_status +BEGIN TRANSACTION + LET task = (SELECT project_id, title, status FROM tasks WHERE task_id = ?); + LET old_project = (SELECT status FROM projects WHERE project_id = ?); -- old_project_id + LET new_project = (SELECT status FROM projects WHERE project_id = ?); -- new_project_id + + IF task IS NOT NULL + AND task.project_id = ? -- verify task belongs to old_project_id + AND old_project.status = 'active' + AND new_project.status = 'active' THEN + + -- Update task's project reference + UPDATE tasks SET project_id = ? WHERE task_id = ?; -- new_project_id + + -- Update denormalized index: remove from old, add to new + DELETE FROM tasks_by_project WHERE project_id = ? AND task_id = ?; -- old_project_id + INSERT INTO tasks_by_project (project_id, task_id, title, status) + VALUES (?, ?, ?, ?); -- new_project_id, task_id, task_title, task_status + + -- Update counts on both projects + UPDATE projects SET task_count = task_count - 1 WHERE project_id = ?; -- old_project_id + UPDATE projects SET task_count = task_count + 1 WHERE project_id = ?; -- new_project_id + END IF +COMMIT TRANSACTION +---- + +== Summary + +These patterns demonstrate how Accord transactions solve problems that were previously difficult in Cassandra: + +|=== +| Pattern | Problem Solved | Key Technique + +| Synchronous Unique Constraints +| Non-primary-key uniqueness +| Sidecar index tables with atomic swap + +| Distributed State Machine +| Race conditions in status changes +| IF condition guards valid transitions + +| Synchronous Denormalization +| Stale aggregate counts, duplicate increments on retry +| Idempotent creation with atomic detail + summary updates + +| Multi-Entity Referential Integrity +| Orphaned child records +| Parent existence check before child operations +|=== + +All patterns share common principles: + +* **Read what you need**: Use LET to capture current state +* **Guard with IF**: Validate preconditions before modifications +* **Atomic updates**: All changes succeed or fail together +* **Pass computed values as parameters**: Row-reference arithmetic in SET/VALUES is not supported diff --git a/doc/modules/cassandra/pages/developing/cql/transactions.adoc b/doc/modules/cassandra/pages/developing/cql/transactions.adoc new file mode 100644 index 000000000000..b4ebc80df0ee --- /dev/null +++ b/doc/modules/cassandra/pages/developing/cql/transactions.adoc @@ -0,0 +1,862 @@ += Accord Transactions +:page-nav-title: Transactions + +Accord provides strong consistency and ACID guarantees for Cassandra operations. +When enabled on a table, **all CQL operations automatically execute through Accord** - no code changes required. +For complex multi-step operations, explicit transaction syntax (`BEGIN TRANSACTION ... COMMIT TRANSACTION`) allows you to read, apply conditions, and write atomically across multiple partitions and tables. + +== Overview + +=== Key Benefits + +* **Automatic Strong Consistency**: Normal CQL reads and writes become linearizable when `transactional_mode='full'` +* **ACID Guarantees**: Atomicity, Consistency, Isolation, and Durability across multiple operations +* **Multi-Partition Consistency**: Coordinate updates across different partition keys +* **Multi-Table Support**: Update multiple tables atomically within a single transaction +* **Complex Business Logic**: Support for conditional operations with multiple steps + +=== When to Use Explicit Transactions + +While normal CQL operations are automatically transactional with `transactional_mode='full'`, use explicit `BEGIN TRANSACTION ... COMMIT TRANSACTION` syntax when you need: + +* **Read-Modify-Write Patterns**: Check a condition before making changes +* **Complex Business Logic**: Multi-step operations that must be atomic +* **Cross-Partition Operations**: Updates that span multiple partition keys +* **Multi-Table Atomicity**: Ensure related changes across tables succeed or fail together + +=== Safety & Consistency + +Accord ensures data integrity through: + +* **Snapshot Isolation**: Each transaction sees a consistent snapshot of data +* **Conflict Detection**: Automatic handling of concurrent access to the same data +* **Atomic Commitment**: All changes commit together or none at all +* **Durable Writes**: Committed transactions survive node failures + +== Getting Started + +=== Prerequisites + +Before using transactions: + +. **Enable Accord globally** in `cassandra.yaml`: ++ +[source,yaml] +---- +accord: + enabled: true +---- + +. **Enable transactional mode on tables**: ++ +[source,cql] +---- +CREATE TABLE users ( + id UUID PRIMARY KEY, + email text, + balance decimal +) WITH transactional_mode = 'full'; +---- + +See <> for detailed mode explanations. + +=== Normal CQL Operations Are Transactional + +When a table has `transactional_mode='full'`, your existing CQL statements are automatically executed through Accord. **You do not need to rewrite your application code.** + +[source,cql] +---- +-- These normal CQL operations are automatically transactional: + +-- Reads are executed through Accord +SELECT id, email, balance FROM users WHERE id = 123e4567-e89b-12d3-a456-426614174000; + +-- Writes are executed through Accord +INSERT INTO users (id, email, balance) VALUES (123e4567-e89b-12d3-a456-426614174000, 'user@example.com', 100.00); + +UPDATE users SET balance = 50.00 WHERE id = 123e4567-e89b-12d3-a456-426614174000; + +DELETE FROM users WHERE id = 123e4567-e89b-12d3-a456-426614174000; +---- + +Each statement executes as an individual Accord transaction, providing linearizability, consistency, and durability. Migrating to Accord can be as simple as enabling `transactional_mode='full'` on your tables. + +=== Your First Explicit Transaction + +[source,cql] +---- +BEGIN TRANSACTION + SELECT id, email, balance FROM users WHERE id = 123e4567-e89b-12d3-a456-426614174000; +COMMIT TRANSACTION +---- + +This simple transaction reads a single row with full ACID guarantees. + +== Transaction Syntax + +=== Basic Structure + +All transactions follow this pattern: + +[source,cql] +---- +BEGIN TRANSACTION + [LET assignments] + [SELECT statements] + [IF conditions THEN] + [modification statements] + [END IF] +COMMIT TRANSACTION +---- + +=== LET Assignments + +LET statements read data and bind it to variables for use later in the transaction: + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT id, balance FROM users WHERE id = ?); + LET account_data = (SELECT account_type FROM accounts WHERE user_id = ?); + + IF user_data.balance > 100 AND account_data.account_type = 'premium' THEN + UPDATE users SET balance = balance - 50 WHERE id = ?; + END IF +COMMIT TRANSACTION +---- + +**LET Requirements:** + +* Each LET must specify a unique variable name +* SELECT must return exactly one row (use `LIMIT 1` if needed) +* All partition key columns must be specified with equality operators +* Cannot use `ORDER BY`, `GROUP BY`, or aggregation functions +* Cannot use range queries or multi-partition operations + +**Valid LET Examples:** +[source,cql] +---- +LET user_data = (SELECT balance, status FROM users WHERE id = ?); +LET order_info = (SELECT total, shipping_fee FROM orders WHERE id = ? LIMIT 1); +LET static_config = (SELECT max_attempts FROM config WHERE setting_type = 'retry'); +---- + +**Invalid LET Examples:** +[source,cql] +---- +-- Missing LIMIT 1 with potential multiple results +LET users = (SELECT * FROM users WHERE status = 'active'); + +-- Range query not allowed +LET recent = (SELECT * FROM events WHERE id > ? AND id < ?); + +-- Aggregation not supported +LET total = (SELECT COUNT(*) FROM orders WHERE user_id = ?); +---- + +=== Row References + +Access fields from LET variables using dot notation: + +[source,cql] +---- +BEGIN TRANSACTION + LET current_user = (SELECT balance, status FROM users WHERE id = ?); + + -- Access fields with dot notation + SELECT current_user.balance, current_user.status; + + -- Use in conditions + IF current_user.balance > 0 AND current_user.status = 'active' THEN + UPDATE users SET balance = balance - 25 WHERE id = ?; + END IF +COMMIT TRANSACTION +---- + +[[row-reference-limitations]] +**Row Reference Limitations:** + +Row reference arithmetic in SET clauses and row references in VALUES are not currently supported. Pass values as parameters instead: + +[source,cql] +---- +-- Application code computes values, passes as parameters +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + + IF user_data.balance >= ? THEN -- Pass order_total as parameter + UPDATE users SET balance = balance - ? WHERE id = ?; -- Pass order_total + END IF +COMMIT TRANSACTION +---- + +=== Conditional Logic + +Add conditional logic to transactions with IF blocks: + +[source,cql] +---- +BEGIN TRANSACTION + LET sender = (SELECT balance FROM accounts WHERE user_id = ?); + + IF sender.balance >= 100 THEN + UPDATE accounts SET balance = balance - 100 WHERE user_id = ?; + UPDATE accounts SET balance = balance + 100 WHERE user_id = ?; + END IF +COMMIT TRANSACTION +---- + +**Supported Operators:** + +* Comparison: `=`, `<`, `<=`, `>`, `>=`, `!=` +* Null checks: `IS NULL`, `IS NOT NULL` +* Logical: `AND` (only - `OR` is not supported) + +**Complex Condition Examples:** +[source,cql] +---- +-- Multiple conditions with AND +IF user_data.balance >= 100 AND user_data.status = 'active' + AND user_data.credit_limit > 150 THEN + -- statements +END IF + +-- Null checking +IF account_info IS NOT NULL AND account_info.balance > 0 THEN + -- statements +END IF +---- + +**Important Notes:** + +* Only `AND` is supported, not `OR` +* Null handling is strict (any null comparison returns false) +* All modification statements must be inside the IF block when using conditions + +=== Returning Results + +Return data from transactions using SELECT statements that appear before any modifications. You can use either row reference values or a normal single-partition SELECT: + +==== Using Row References + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance, status FROM users WHERE id = ?); + + -- Return row reference values (must come before UPDATE) + SELECT user_data.balance, user_data.status; + + UPDATE users SET balance = balance - 50 WHERE id = ?; +COMMIT TRANSACTION +---- + +==== Using a Normal SELECT + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + + -- Return data directly from table (must come before UPDATE) + SELECT balance, status, email FROM users WHERE id = ?; + + IF user_data.balance >= 50 THEN + UPDATE users SET balance = balance - 50 WHERE id = ?; + END IF +COMMIT TRANSACTION +---- + +**Important:** SELECT statements must appear before any UPDATE, INSERT, or DELETE statements. To retrieve updated values, query outside the transaction after it commits. + +== Common Patterns + +=== Read-Modify-Write + +Check a condition before making changes: + +[source,cql] +---- +BEGIN TRANSACTION + LET sender_account = (SELECT balance FROM accounts WHERE id = ?); + + IF sender_account.balance >= ? THEN + UPDATE accounts SET balance = balance - ? WHERE id = ?; + UPDATE accounts SET balance = balance + ? WHERE id = ?; + + INSERT INTO transactions (id, from_account, to_account, amount, timestamp) + VALUES (?, ?, ?, ?, toTimestamp(now())); + END IF +COMMIT TRANSACTION +---- + +=== Conditional Insert + +Prevent duplicate records: + +[source,cql] +---- +BEGIN TRANSACTION + LET existing_user = (SELECT user_id FROM email_index WHERE email = ? LIMIT 1); + + IF existing_user IS NULL THEN + INSERT INTO users (id, email, created_at, status) + VALUES (?, ?, toTimestamp(now()), 'active'); + + INSERT INTO email_index (email, user_id) VALUES (?, ?); + + INSERT INTO user_profiles (user_id, display_name) + VALUES (?, ?); + END IF +COMMIT TRANSACTION +---- + +=== Multi-Table Updates + +Maintain referential integrity across tables: + +[source,cql] +---- +BEGIN TRANSACTION + UPDATE users SET status = 'suspended', suspended_at = toTimestamp(now()) + WHERE id = ?; + + UPDATE orders SET status = 'cancelled' + WHERE order_id = ?; + + INSERT INTO audit_log (id, user_id, action, timestamp) + VALUES (?, ?, 'user_suspended', toTimestamp(now())); +COMMIT TRANSACTION +---- + +=== Cross-Partition Transactions + +Coordinate updates across different partitions (see <> for parameter passing): + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance, status FROM users WHERE id = ?); + + IF user_data.status = 'active' AND user_data.balance >= ? THEN + UPDATE users SET balance = balance - ? WHERE id = ?; + + INSERT INTO orders (id, user_id, total, status, created_at) + VALUES (?, ?, ?, 'confirmed', toTimestamp(now())); + + UPDATE inventory SET quantity = quantity - ? WHERE product_id = ?; + END IF +COMMIT TRANSACTION +---- + +[[transactional-modes]] +== Transactional Modes + +Tables must be configured with one of these transactional modes: + +=== transactional_mode='off' (Default) + +* No Accord transaction support +* Uses traditional Cassandra behavior +* Lightweight transactions use Paxos protocol +* Cannot participate in Accord transactions + +**When to Use:** + +* Tables not ready for transaction migration +* High-throughput tables where transaction overhead isn't justified +* Tables with existing Paxos-based logic that works well + +[source,cql] +---- +CREATE TABLE tbl (...) +WITH transactional_mode = 'off'; +---- + +[[transactional_mode_mixed_reads]] +=== transactional_mode='mixed_reads' + +NOTE: Most users should migrate directly from `off` to `full`. This mode is only needed for specific scenarios where you must mix transactional and non-transactional access on the same table during a transition period. + +* Non-SERIAL writes are routed through Accord but committed at the supplied consistency level +* Allows non-SERIAL reads to see transactionally written data +* Blocking read repair is routed through Accord to avoid exposing uncommitted data + +**When to Use:** + +* Applications that **require** mixed transactional and non-transactional access on the same table simultaneously + +**Trade-offs:** + +* **Slower transactions**: Accord cannot perform single-replica read optimization because it must ensure data is readable at the non-SERIAL consistency level +* **Read repair overhead**: Accord must repair stale replicas during reads +* **Not recommended for most users**: Direct migration from `off` to `full` is simpler and provides better performance + +[source,cql] +---- +ALTER TABLE tbl WITH transactional_mode = 'mixed_reads'; +---- + +=== transactional_mode='full' (Recommended) + +* All reads and writes must occur through Accord transactions +* Enables single-replica reads since non-transactional readers don't exist +* Best transaction performance + +**When to Use:** + +* New tables that will use transactions +* Tables migrating from `off` mode (recommended for most users) +* Maximum transaction performance required + +**Why this mode is faster:** + +* **Single-replica reads**: Accord can read from a single replica and still provide correct results +* **No read repair overhead**: Accord doesn't need to repair data for non-transactional readers + +[source,cql] +---- +CREATE TABLE tbl (...) +WITH transactional_mode = 'full'; +---- + +== Migration Guide + +=== From Light Weight Transactions (LWT) + +==== IF EXISTS + +**Before (LWT):** +[source,cql] +---- +UPDATE accounts SET balance = balance - 100 +WHERE user_id = 12345 +IF EXISTS; +---- + +**After (Accord):** +[source,cql] +---- +BEGIN TRANSACTION + LET account_data = (SELECT balance FROM accounts WHERE user_id = 12345); + + IF account_data IS NOT NULL THEN + UPDATE accounts SET balance = balance - 100 WHERE user_id = 12345; + END IF +COMMIT TRANSACTION +---- + +==== IF NOT EXISTS + +**Before (LWT):** +[source,cql] +---- +INSERT INTO users (id, email, status) +VALUES (?, ?, 'active') +IF NOT EXISTS; +---- + +**After (Accord):** +[source,cql] +---- +BEGIN TRANSACTION + LET existing_user = (SELECT id FROM users WHERE id = ? LIMIT 1); + + IF existing_user IS NULL THEN + INSERT INTO users (id, email, status) VALUES (?, ?, 'active'); + END IF +COMMIT TRANSACTION +---- + +==== Complex CAS with Multiple Operations + +**Before (Multiple LWT operations with race condition risk):** +[source,cql] +---- +UPDATE accounts SET balance = balance - 50 +WHERE user_id = 12345 AND balance >= 50 +IF balance >= 50; + +-- Separate operation (not atomic with above) +INSERT INTO transaction_log (id, user_id, amount, timestamp) +VALUES (?, 12345, -50, toTimestamp(now())); +---- + +**After (Single atomic transaction):** +[source,cql] +---- +BEGIN TRANSACTION + LET account = (SELECT balance FROM accounts WHERE user_id = 12345); + + IF account.balance >= 50 THEN + UPDATE accounts SET balance = balance - 50 WHERE user_id = 12345; + + INSERT INTO transaction_log (id, user_id, amount, timestamp) + VALUES (?, 12345, -50, toTimestamp(now())); + END IF +COMMIT TRANSACTION +---- + +=== From BATCH Statements + +BATCH provides atomicity but not consistency checks. Transactions add conditional logic: + +**Before (BATCH - no condition checking):** +[source,cql] +---- +BEGIN BATCH + UPDATE users SET balance = balance - 100 WHERE id = ?; + INSERT INTO orders (id, user_id, amount) VALUES (?, ?, 100); +APPLY BATCH; +-- Problem: Can't check if balance is sufficient! +---- + +**After (Transaction with condition):** +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + + IF user_data.balance >= 100 THEN + UPDATE users SET balance = balance - 100 WHERE id = ?; + INSERT INTO orders (id, user_id, amount) VALUES (?, ?, 100); + END IF +COMMIT TRANSACTION +---- + +For simple LOGGED BATCH without conditions, transactions provide stronger guarantees: + +[source,cql] +---- +BEGIN TRANSACTION + UPDATE user_profiles SET last_active = toTimestamp(now()) WHERE user_id = ?; + INSERT INTO user_activity (user_id, activity, timestamp) VALUES (?, 'login', toTimestamp(now())); + UPDATE user_stats SET login_count = login_count + 1 WHERE user_id = ?; +COMMIT TRANSACTION +---- + +=== From Multiple Separate Statements + +**Before (Race condition risk):** +[source,cql] +---- +-- Step 1: Check inventory +SELECT quantity FROM inventory WHERE product_id = ?; +-- Step 2: Application checks quantity +-- Step 3: Update (but quantity might have changed!) +UPDATE inventory SET quantity = quantity - 1 WHERE product_id = ?; +UPDATE orders SET status = 'confirmed' WHERE id = ?; +---- + +**After (Atomic with condition):** +[source,cql] +---- +BEGIN TRANSACTION + LET inventory_check = (SELECT quantity FROM inventory WHERE product_id = ?); + + IF inventory_check.quantity > 0 THEN + UPDATE inventory SET quantity = quantity - 1 WHERE product_id = ?; + UPDATE orders SET status = 'confirmed' WHERE id = ?; + END IF +COMMIT TRANSACTION +---- + +== Advanced Features + +=== Null Handling + +Transactions use strict null semantics, different from Paxos CAS: + +[source,cql] +---- +-- CAS behavior (Paxos): balance = null returns true when balance IS null +UPDATE users SET balance = 100 WHERE id = ? IF balance = null; +---- + +In transactions, `balance = null` is always FALSE. Use `IS NULL` instead: + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + + -- Check for null, then check value + IF user_data IS NOT NULL AND user_data.balance IS NULL THEN + UPDATE users SET balance = 100 WHERE id = ?; + END IF +COMMIT TRANSACTION +---- + +**Null Propagation Rules:** + +* Any comparison with null returns false (except IS NULL/IS NOT NULL) +* Arithmetic operations with null return null +* Null columns in row references return null (not errors) + +=== Multi-Partition Coordination + +Cross-partition transactions add coordination overhead: + +[source,cql] +---- +BEGIN TRANSACTION + LET user1 = (SELECT balance FROM accounts WHERE user_id = ?); + LET user2 = (SELECT balance FROM accounts WHERE user_id = ?); + + IF user1.balance >= 100 THEN + UPDATE accounts SET balance = balance - 100 WHERE user_id = ?; + UPDATE accounts SET balance = balance + 100 WHERE user_id = ?; + END IF +COMMIT TRANSACTION +---- + +**Performance Impact:** + +* Same-partition transactions: Low overhead +* Cross-partition transactions: Moderate overhead +* More partitions = higher coordination cost + +**Optimization:** Minimize cross-partition operations; consider data model changes to reduce cross-partition transactions. + +=== Automatic Read Generation + +When UPDATE statements reference current column values, Accord automatically reads the current state: + +[source,cql] +---- +BEGIN TRANSACTION + -- This UPDATE automatically reads current balance + UPDATE users SET balance = balance + ? WHERE id = ?; +COMMIT TRANSACTION +---- + +**Triggers:** + +* SET clauses that reference current column values + +== Syntax Reference + +=== Complete Grammar + +[source,cql] +---- +BEGIN TRANSACTION + [LET letStatements] + [SELECT selectStatement | SELECT rowDataReferences] + [IF conditionalExpression THEN] + [modificationStatements] + [END IF] +COMMIT TRANSACTION +---- + +=== Modification Statements + +**INSERT:** +[source,cql] +---- +INSERT INTO table (columns) VALUES (values); +---- + +**UPDATE:** +[source,cql] +---- +UPDATE table SET assignments WHERE conditions; +---- + +**DELETE:** +[source,cql] +---- +DELETE [columns] FROM table WHERE conditions; +---- + +**Restrictions:** + +* Cannot specify IF conditions (use transaction IF blocks) +* Cannot specify custom TTL +* Cannot use USING TIMESTAMP +* Must target single partitions (no range operations) + +== Restrictions & Limitations for BEGIN TRANSACTION + +=== Schema Restrictions + +* `Accord transactions are disabled`: Enable in cassandra.yaml +* `Accord transactions are disabled on table`: Set transactional_mode on table +* `table is being dropped`: Wait for drop to complete + +=== Syntax Restrictions + +* `Updates may not specify their own conditions`: Use transaction IF blocks +* `Updates may not specify custom timestamps`: Transaction manages timestamps +* `Updates may not specify custom ttls`: TTL not supported in transactions +* `Counter columns cannot be accessed`: Use regular columns instead +* `No aggregation functions allowed`: COUNT, SUM, AVG not supported +* `No ORDER BY clause allowed`: Remove ORDER BY from SELECTs +* `No GROUP BY clause allowed`: Remove GROUP BY from SELECTs + +=== Query Restrictions + +* `SELECT must specify all partition key elements`: Use equality operators for all partition key columns +* `Range queries are not allowed`: Use equality operators only +* `Partition key in IN clause with LIMIT not supported`: Avoid this combination + +=== Feature Incompatibilities + +**Cannot Be Used in Transactions:** + +* Counter tables and counter operations +* Aggregation functions (COUNT, SUM, AVG, etc.) +* ORDER BY and GROUP BY clauses +* Custom TTL and timestamp specifications +* Range DELETE operations +* Non-equality partition key restrictions + +== Best Practices + +=== Keep Transactions Focused + +**Do:** Single business operation +[source,cql] +---- +BEGIN TRANSACTION + LET account = (SELECT balance FROM accounts WHERE user_id = ?); + + IF account.balance >= ? THEN + UPDATE accounts SET balance = balance - ? WHERE user_id = ?; + INSERT INTO transactions (id, user_id, amount) VALUES (?, ?, ?); + END IF +COMMIT TRANSACTION +---- + +**Don't:** Combine unrelated operations in one transaction. + +=== Minimize Cross-Partition Operations + +**Do:** Same partition when possible +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance, status FROM users WHERE id = ?); + + IF user_data.balance > 100 AND user_data.status = 'active' THEN + UPDATE users SET balance = balance - 50 WHERE id = ?; + INSERT INTO user_history (user_id, action, amount) VALUES (?, 'debit', 50); + END IF +COMMIT TRANSACTION +---- + +**Don't:** Read from partitions you don't use. + +=== Transaction Sizing Guidelines + +* 1-5 LET statements per transaction +* 1-10 modification statements per transaction +* Target 2-3 partitions maximum + +=== Anti-Patterns to Avoid + +**Unnecessary transaction overhead:** +[source,cql] +---- +-- Bad: Simple insert doesn't need explicit transaction +BEGIN TRANSACTION + INSERT INTO simple_log (id, message, timestamp) VALUES (?, ?, ?); +COMMIT TRANSACTION + +-- Good: Use regular CQL (still transactional with transactional_mode='full') +INSERT INTO simple_log (id, message, timestamp) VALUES (?, ?, ?); +---- + +**Reading unused data:** +[source,cql] +---- +-- Bad: account_data is never used +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + LET account_data = (SELECT balance FROM accounts WHERE user_id = ?); -- unused + + IF user_data.balance > 100 THEN + UPDATE users SET balance = balance - 50 WHERE id = ?; + END IF +COMMIT TRANSACTION +---- + +== Troubleshooting + +=== Configuration Errors + +**`Accord transactions are disabled`** + +Enable in cassandra.yaml: +[source,yaml] +---- +accord: + enabled: true +---- + +**`Accord transactions are disabled on table`** + +Enable on table: +[source,cql] +---- +ALTER TABLE tbl WITH transactional_mode = 'full'; +---- + +=== Syntax Errors + +**`The name 'variable_name' has already been used`** + +Use unique variable names: +[source,cql] +---- +-- Bad +LET user_data = (SELECT balance FROM accounts WHERE user_id = 1); +LET user_data = (SELECT balance FROM accounts WHERE user_id = 2); + +-- Good +LET sender_data = (SELECT balance FROM accounts WHERE user_id = 1); +LET receiver_data = (SELECT balance FROM accounts WHERE user_id = 2); +---- + +=== Performance Issues + +**High latency investigation:** + +. Check transaction complexity (LET count < 5, partitions < 3) +. Monitor cross-datacenter operations +. Check for high contention on specific keys + +**Large result sets:** + +Select only needed columns: +[source,cql] +---- +-- Bad: Reading entire row +LET user_history = (SELECT * FROM large_history_table WHERE user_id = ? LIMIT 1); + +-- Good: Only read necessary data +LET user_status = (SELECT status, last_login FROM users WHERE id = ?); +---- + +=== Counter Table Alternative + +Counters cannot be used in transactions. Use regular columns: + +[source,cql] +---- +-- Bad: Counter in transaction +CREATE TABLE page_views ( + page_id uuid PRIMARY KEY, + views counter +) WITH transactional_mode = 'full'; + +-- Good: Regular column +CREATE TABLE page_views ( + page_id uuid PRIMARY KEY, + views bigint +) WITH transactional_mode = 'full'; + +BEGIN TRANSACTION + UPDATE page_views SET views = views + 1 WHERE page_id = ?; +COMMIT TRANSACTION +---- From 5a632241b5737da197b7975a852759331acc4efe Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 26 Jan 2026 16:55:02 -0800 Subject: [PATCH 2/4] null feedback --- .../pages/developing/cql/transactions.adoc | 135 +++++++++++++++++- 1 file changed, 128 insertions(+), 7 deletions(-) diff --git a/doc/modules/cassandra/pages/developing/cql/transactions.adoc b/doc/modules/cassandra/pages/developing/cql/transactions.adoc index b4ebc80df0ee..08a25eccf7f9 100644 --- a/doc/modules/cassandra/pages/developing/cql/transactions.adoc +++ b/doc/modules/cassandra/pages/developing/cql/transactions.adoc @@ -468,6 +468,37 @@ BEGIN TRANSACTION COMMIT TRANSACTION ---- +==== IF column = null (NULL Comparison) + +[IMPORTANT] +==== +LWT and Accord handle `IF column = null` differently. See <> for complete details. +==== + +LWT treats `column = null` as "column is null," but Accord follows SQL semantics where `column = null` is always FALSE. + +**Before (LWT):** +[source,cql] +---- +-- LWT: Matches when email is null/missing +UPDATE users SET email = 'default@example.com' +WHERE id = ? +IF email = null; +---- + +**After (Accord):** +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT email FROM users WHERE id = ?); + + -- Must use IS NULL, not = null + IF user_data IS NOT NULL AND user_data.email IS NULL THEN + UPDATE users SET email = 'default@example.com' WHERE id = ?; + END IF +COMMIT TRANSACTION +---- + ==== Complex CAS with Multiple Operations **Before (Multiple LWT operations with race condition risk):** @@ -565,33 +596,123 @@ COMMIT TRANSACTION === Null Handling -Transactions use strict null semantics, different from Paxos CAS: +[IMPORTANT] +==== +Accord transactions follow SQL standard null semantics, which differ from LWT (Paxos) behavior. This is a critical difference when migrating from LWT to Accord. +==== + +==== LWT vs Accord: NULL Comparison Behavior + +|=== +| Condition | Column Value | LWT (Paxos) Result | Accord Result + +| `IF column = null` +| Column is NULL/missing +| **TRUE** (matches) +| **FALSE** (never matches) + +| `IF column = null` +| Column has a value +| FALSE +| FALSE + +| `IF column IS NULL` +| Column is NULL/missing +| TRUE +| TRUE + +| `IF column != null` +| Column is NULL/missing +| FALSE +| **FALSE** (null comparisons always false) +| `IF column != null` +| Column has a value +| TRUE +| TRUE +|=== + +==== Why the Difference? + +**LWT (Paxos) behavior:** LWT treats `column = null` as a special case meaning "column is null or missing." This is a Cassandra-specific convenience that deviates from SQL standards. + +**Accord behavior:** Accord follows SQL standard semantics where `NULL` represents an unknown value. Comparing anything to `NULL` using `=`, `!=`, `<`, `>`, etc. always returns `FALSE` (or more precisely, `UNKNOWN`, which is treated as `FALSE` in conditionals). This is because you cannot know if an unknown value equals another value. + +==== Migration from LWT + +If your LWT code uses `IF column = null`, you must update it when migrating to Accord: + +**Before (LWT):** [source,cql] ---- --- CAS behavior (Paxos): balance = null returns true when balance IS null +-- LWT: This returns TRUE when balance is null/missing UPDATE users SET balance = 100 WHERE id = ? IF balance = null; ---- -In transactions, `balance = null` is always FALSE. Use `IS NULL` instead: - +**After (Accord):** [source,cql] ---- BEGIN TRANSACTION LET user_data = (SELECT balance FROM users WHERE id = ?); - -- Check for null, then check value + -- Use IS NULL for null checks IF user_data IS NOT NULL AND user_data.balance IS NULL THEN UPDATE users SET balance = 100 WHERE id = ?; END IF COMMIT TRANSACTION ---- -**Null Propagation Rules:** +TIP: Null checks are recursive. If `user_data` is null (row doesn't exist), then `user_data.balance` is also null. So `IF user_data.balance IS NULL` will be true both when the row doesn't exist AND when the row exists but the column is null. Use `user_data IS NOT NULL` first only if you need to distinguish between these cases. + +==== Common Patterns + +**Check if a column is null (row missing OR column unset):** +[source,cql] +---- +IF user_data.balance IS NULL THEN + -- Either row doesn't exist, OR row exists but balance is null +END IF +---- + +**Check if a row exists (regardless of column value):** +[source,cql] +---- +IF user_data IS NOT NULL THEN + -- Row exists (balance could be null or have a value) +END IF +---- + +**Check if a column is null but row exists:** +[source,cql] +---- +IF user_data IS NOT NULL AND user_data.balance IS NULL THEN + -- Row exists AND balance is null (distinguishes from missing row) +END IF +---- + +**Check if a column has a specific value:** +[source,cql] +---- +IF user_data IS NOT NULL AND user_data.balance = 100 THEN + -- Row exists AND balance equals 100 +END IF +---- + +**Check if a row does not exist:** +[source,cql] +---- +IF user_data IS NULL THEN + -- Row does not exist +END IF +---- + +==== Null Propagation Rules -* Any comparison with null returns false (except IS NULL/IS NOT NULL) +* Any comparison with null using `=`, `!=`, `<`, `>`, `<=`, `>=` returns **FALSE** +* Only `IS NULL` and `IS NOT NULL` can meaningfully test for null * Arithmetic operations with null return null * Null columns in row references return null (not errors) +* Accessing a field on a null row reference (e.g., `missing_row.column`) returns null === Multi-Partition Coordination From 0422b685e009cfa6510a8c527229c10311b62c4b Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 26 Jan 2026 17:05:38 -0800 Subject: [PATCH 3/4] added new doc to show limitations and their workaround --- doc/modules/cassandra/nav.adoc | 1 + .../developing/cql/transactions-examples.adoc | 4 +- .../cql/transactions-limitations.adoc | 312 ++++++++++++++++++ .../pages/developing/cql/transactions.adoc | 4 +- 4 files changed, 319 insertions(+), 2 deletions(-) create mode 100644 doc/modules/cassandra/pages/developing/cql/transactions-limitations.adoc diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc index 5968c0718a39..1ab8784b34c0 100644 --- a/doc/modules/cassandra/nav.adoc +++ b/doc/modules/cassandra/nav.adoc @@ -45,6 +45,7 @@ *** xref:cassandra:developing/cql/dml.adoc[Data manipulation (DML)] *** xref:cassandra:developing/cql/transactions.adoc[Transactions] *** xref:cassandra:developing/cql/transactions-examples.adoc[Transaction examples] +*** xref:cassandra:developing/cql/transactions-limitations.adoc[Transaction limitations] *** xref:cassandra:developing/cql/dynamic-data-masking.adoc[] *** xref:cassandra:developing/cql/operators.adoc[Operators] *** xref:cassandra:developing/cql/indexing/indexing-concepts.adoc[] diff --git a/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc b/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc index df1753137260..cdbee0cc4bf3 100644 --- a/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc +++ b/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc @@ -97,7 +97,7 @@ Many business objects follow a lifecycle with strict state transitions. An order * Prevent race conditions between competing operations * Maintain audit trail of transitions -NOTE: The `IN` and `OR` operators are not currently supported in transaction `IF` conditions. To check multiple valid states, use **numeric status codes** with range comparisons (e.g., `status_code < 30` to mean "any state before SHIPPED"). +NOTE: The `IN` and `OR` operators are not currently supported in transaction `IF` conditions. To check multiple valid states, use **numeric status codes** with range comparisons (e.g., `status_code < 30` to mean "any state before SHIPPED"). See xref:developing/cql/transactions-limitations.adoc#_in_and_or_in_if_conditions[Transaction Limitations] for details. === Schema @@ -414,3 +414,5 @@ All patterns share common principles: * **Guard with IF**: Validate preconditions before modifications * **Atomic updates**: All changes succeed or fail together * **Pass computed values as parameters**: Row-reference arithmetic in SET/VALUES is not supported + +For syntax that is not yet supported and how to work around it, see xref:developing/cql/transactions-limitations.adoc[Transaction Limitations]. diff --git a/doc/modules/cassandra/pages/developing/cql/transactions-limitations.adoc b/doc/modules/cassandra/pages/developing/cql/transactions-limitations.adoc new file mode 100644 index 000000000000..8d49e0e0be6d --- /dev/null +++ b/doc/modules/cassandra/pages/developing/cql/transactions-limitations.adoc @@ -0,0 +1,312 @@ += Transaction Syntax Limitations +:page-nav-title: Transaction Limitations + +This page documents CQL transaction syntax that users commonly expect to work but is not currently supported. Each section explains the limitation, shows the error you'll encounter, and provides a working alternative. + +For transaction syntax and usage, see xref:developing/cql/transactions.adoc[Accord Transactions]. For design patterns, see xref:developing/cql/transactions-examples.adoc[Transaction Patterns]. + +== Arithmetic with Row References + +Row references (LET variable fields) cannot be used in arithmetic expressions within UPDATE SET clauses or SELECT statements. + +=== What Doesn't Work + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + UPDATE users SET balance = user_data.balance - 50 WHERE id = ?; -- Error +COMMIT TRANSACTION +---- + +**Error:** `no viable alternative at input '-'` + +Similarly, arithmetic in SELECT statements fails: + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + SELECT user_data.balance - 50; -- Error +COMMIT TRANSACTION +---- + +=== What Works Instead + +Use column self-reference in UPDATE statements. The transaction reads the current value automatically: + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + IF user_data.balance >= 50 THEN + UPDATE users SET balance = balance - 50 WHERE id = ?; -- Column self-reference works + END IF +COMMIT TRANSACTION +---- + +For computed values needed in INSERT or cross-table scenarios, compute in your application and pass as a parameter: + +[source,cql] +---- +-- Application computes: new_balance = current_balance - 50 + +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + IF user_data.balance >= 50 THEN + UPDATE users SET balance = ? WHERE id = ?; -- Pass computed value as parameter + END IF +COMMIT TRANSACTION +---- + +== Comparing Two Row References + +Comparing values from two different LET variables in an IF condition is not supported. + +=== What Doesn't Work + +[source,cql] +---- +BEGIN TRANSACTION + LET account = (SELECT balance FROM accounts WHERE id = ?); + LET limit = (SELECT max_balance FROM limits WHERE type = 'standard'); + + IF account.balance < limit.max_balance THEN -- Error + UPDATE accounts SET balance = balance + 10 WHERE id = ?; + END IF +COMMIT TRANSACTION +---- + +**Error:** `IllegalArgumentException` + +=== What Works Instead + +Read the comparison value in your application and pass it as a parameter: + +[source,cql] +---- +-- Application code: +-- 1. Read: SELECT max_balance FROM limits WHERE type = 'standard' +-- 2. Pass max_balance as parameter to transaction + +BEGIN TRANSACTION + LET account = (SELECT balance FROM accounts WHERE id = ?); + + IF account.balance < ? THEN -- Pass max_balance as parameter + UPDATE accounts SET balance = balance + 10 WHERE id = ?; + END IF +COMMIT TRANSACTION +---- + +== Range Updates and Deletes + +UPDATE and DELETE operations must specify the complete primary key. Range operations that would affect multiple rows are not supported. + +=== What Doesn't Work + +[source,cql] +---- +-- Table: user_sessions (user_id uuid, session_id uuid, status text, PRIMARY KEY (user_id, session_id)) + +BEGIN TRANSACTION + UPDATE user_sessions SET status = 'invalidated' WHERE user_id = ?; -- Error: missing session_id +COMMIT TRANSACTION +---- + +**Error:** `Some clustering keys are missing` + +=== What Works Instead + +Query the specific rows first (outside the transaction or in your application), then update each row individually: + +[source,cql] +---- +-- Step 1: Application queries session IDs +-- SELECT session_id FROM user_sessions WHERE user_id = ?; + +-- Step 2: Transaction updates specific rows +BEGIN TRANSACTION + UPDATE user_sessions SET status = 'invalidated' WHERE user_id = ? AND session_id = ?; + UPDATE user_sessions SET status = 'invalidated' WHERE user_id = ? AND session_id = ?; + UPDATE user_sessions SET status = 'invalidated' WHERE user_id = ? AND session_id = ?; +COMMIT TRANSACTION +---- + +For large numbers of rows, consider whether you need transactional guarantees for all updates, or whether updating in batches outside transactions is acceptable. + +== IN and OR in IF Conditions + +The `IN` operator and `OR` logical operator are not supported in transaction IF conditions. Only `AND` is available for combining conditions. + +=== What Doesn't Work + +[source,cql] +---- +BEGIN TRANSACTION + LET order = (SELECT status FROM orders WHERE order_id = ?); + + IF order.status IN ('PENDING', 'PAID') THEN -- Error + UPDATE orders SET status = 'CANCELLED' WHERE order_id = ?; + END IF +COMMIT TRANSACTION +---- + +**Error:** `no viable alternative at input 'IN'` + +[source,cql] +---- +BEGIN TRANSACTION + LET order = (SELECT status FROM orders WHERE order_id = ?); + + IF order.status = 'PENDING' OR order.status = 'PAID' THEN -- Error + UPDATE orders SET status = 'CANCELLED' WHERE order_id = ?; + END IF +COMMIT TRANSACTION +---- + +**Error:** `no viable alternative at input 'OR'` + +=== What Works Instead + +Use numeric status codes and range comparisons: + +[source,cql] +---- +-- Schema uses integer status codes: +-- 10 = PENDING, 20 = PAID, 30 = SHIPPED, 40 = DELIVERED, 99 = CANCELLED + +BEGIN TRANSACTION + LET order = (SELECT status_code FROM orders WHERE order_id = ?); + + -- status_code <= 20 means PENDING or PAID + IF order.status_code <= 20 THEN + UPDATE orders SET status_code = 99 WHERE order_id = ?; + END IF +COMMIT TRANSACTION +---- + +Design your status codes so that valid transition states form contiguous ranges. See xref:developing/cql/transactions-examples.adoc#_pattern_distributed_state_machine[Distributed State Machine] for a complete example. + +== SELECT After Modifications + +SELECT statements must appear before any UPDATE, INSERT, or DELETE statements. The transaction grammar enforces this ordering. + +=== What Doesn't Work + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + UPDATE users SET balance = balance - 50 WHERE id = ?; + SELECT user_data.balance; -- Error: SELECT after UPDATE +COMMIT TRANSACTION +---- + +**Error:** `no viable alternative at input 'SELECT'` + +=== What Works Instead + +**Option 1:** SELECT before modifications (returns pre-update values): + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + SELECT user_data.balance; -- Returns value before update + UPDATE users SET balance = balance - 50 WHERE id = ?; +COMMIT TRANSACTION +---- + +**Option 2:** Query after the transaction commits: + +[source,cql] +---- +BEGIN TRANSACTION + UPDATE users SET balance = balance - 50 WHERE id = ?; +COMMIT TRANSACTION + +-- Then query the updated value: +SELECT balance FROM users WHERE id = ?; +---- + +== Computed LET Assignments + +LET statements can only assign the result of a SELECT query. They cannot assign computed values, arithmetic expressions, or CASE expressions. + +=== What Doesn't Work + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + LET new_balance = user_data.balance - 50; -- Error: computed assignment + UPDATE users SET balance = new_balance WHERE id = ?; +COMMIT TRANSACTION +---- + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance, tier FROM users WHERE id = ?); + LET discount = CASE user_data.tier + WHEN 'gold' THEN 0.10 + ELSE 0.0 END; -- Error: CASE expression +COMMIT TRANSACTION +---- + +=== What Works Instead + +Perform computations directly in UPDATE SET clauses using column self-reference, or compute values in your application: + +[source,cql] +---- +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + + IF user_data.balance >= 50 THEN + UPDATE users SET balance = balance - 50 WHERE id = ?; -- Compute in SET clause + END IF +COMMIT TRANSACTION +---- + +For complex calculations like tier-based discounts: + +[source,cql] +---- +-- Application code: +-- 1. Read user tier +-- 2. Calculate discount based on tier +-- 3. Pass final amount as parameter + +BEGIN TRANSACTION + LET user_data = (SELECT balance FROM users WHERE id = ?); + + IF user_data.balance >= ? THEN -- Pass discounted_amount + UPDATE users SET balance = balance - ? WHERE id = ?; -- Pass discounted_amount + END IF +COMMIT TRANSACTION +---- + +== Summary + +|=== +| Limitation | Workaround + +| Arithmetic with row references +| Use column self-reference (`balance = balance - 50`) or pass computed values as parameters + +| Comparing two row references +| Read one value in application, pass as parameter + +| Range updates/deletes +| Query row keys first, update each row individually + +| IN and OR in conditions +| Use numeric codes with range comparisons (`status_code <= 20`) + +| SELECT after modifications +| SELECT before modifications, or query after commit + +| Computed LET assignments +| Compute in SET clauses or application layer +|=== diff --git a/doc/modules/cassandra/pages/developing/cql/transactions.adoc b/doc/modules/cassandra/pages/developing/cql/transactions.adoc index 08a25eccf7f9..990fee634c24 100644 --- a/doc/modules/cassandra/pages/developing/cql/transactions.adoc +++ b/doc/modules/cassandra/pages/developing/cql/transactions.adoc @@ -176,7 +176,7 @@ COMMIT TRANSACTION [[row-reference-limitations]] **Row Reference Limitations:** -Row reference arithmetic in SET clauses and row references in VALUES are not currently supported. Pass values as parameters instead: +Row reference arithmetic in SET clauses and row references in VALUES are not currently supported. Pass values as parameters instead. See xref:developing/cql/transactions-limitations.adoc[Transaction Limitations] for complete details on unsupported syntax and workarounds. [source,cql] ---- @@ -799,6 +799,8 @@ DELETE [columns] FROM table WHERE conditions; == Restrictions & Limitations for BEGIN TRANSACTION +NOTE: For detailed information about common syntax that doesn't work and practical workarounds, see xref:developing/cql/transactions-limitations.adoc[Transaction Limitations]. + === Schema Restrictions * `Accord transactions are disabled`: Enable in cassandra.yaml From 1105f999051304b70b718106fe1b6d9897a2a8ab Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 28 Jan 2026 13:40:18 -0800 Subject: [PATCH 4/4] feedback from benedict --- doc/modules/cassandra/pages/developing/cql/dml.adoc | 5 +++-- .../pages/developing/cql/transactions-examples.adoc | 2 +- .../cassandra/pages/developing/cql/transactions.adoc | 8 +++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/modules/cassandra/pages/developing/cql/dml.adoc b/doc/modules/cassandra/pages/developing/cql/dml.adoc index e57ed59f9e8f..e1b751d70883 100644 --- a/doc/modules/cassandra/pages/developing/cql/dml.adoc +++ b/doc/modules/cassandra/pages/developing/cql/dml.adoc @@ -477,13 +477,14 @@ updates in Cassandra, counter updates are not idempotent. === BATCH vs Transactions -While BATCH statements provide atomicity for multiple operations, they have limitations compared to Accord transactions. Understanding the differences helps you choose the right approach for your use case. +BATCH statements have limitations compared to Accord transactions. Understanding the differences helps you choose the right approach for your use case. ==== BATCH Statement Capabilities BATCH statements offer: * **Single-partition atomicity**: All operations within a single partition succeed or fail together atomically +* **Multi-partition batching**: LOGGED batches can span multiple partitions, but are not atomic. The batch log ensures all operations eventually complete, but there can be a window where only some operations are visible. * **Single partition isolation**: Operations on the same partition are isolated * **Network efficiency**: Multiple operations in one round-trip * **Automatic timestamps**: All operations use the same timestamp by default @@ -494,7 +495,7 @@ BATCH statements offer: However, BATCH statements cannot provide: -* **Cross-partition atomicity**: Multi-partition LOGGED batches use a batch log to ensure eventual completion, but are not truly atomic. There can be a window where only some operations are visible. +* **Cross-partition atomicity**: Multi-partition LOGGED batches are not truly atomic (see above) * **Read-before-write patterns**: Cannot read data within the batch to make decisions based on current values * **Cross-partition conditional updates**: Conditional batches (using `IF` clauses) must operate on a single partition and table * **Complex conditional logic**: `IF` clauses are limited to simple equality/inequality checks on existing column values diff --git a/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc b/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc index cdbee0cc4bf3..faca1a8a5622 100644 --- a/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc +++ b/doc/modules/cassandra/pages/developing/cql/transactions-examples.adoc @@ -7,7 +7,7 @@ For basic syntax, getting started, and migration guides, see xref:developing/cql == Pattern: Synchronous Unique Constraints -Cassandra's primary key enforces uniqueness, but what if you need uniqueness on a non-primary-key column like `email` or `username`? This pattern uses sidecar index tables to enforce multiple unique constraints atomically. +Cassandra's primary key enforces uniqueness, but what if you need uniqueness on a non-primary-key column like `email` or `username`? This pattern uses user-maintained lookup tables to enforce multiple unique constraints atomically. === The Challenge diff --git a/doc/modules/cassandra/pages/developing/cql/transactions.adoc b/doc/modules/cassandra/pages/developing/cql/transactions.adoc index 990fee634c24..60b74cf5e442 100644 --- a/doc/modules/cassandra/pages/developing/cql/transactions.adoc +++ b/doc/modules/cassandra/pages/developing/cql/transactions.adoc @@ -28,7 +28,7 @@ While normal CQL operations are automatically transactional with `transactional_ Accord ensures data integrity through: -* **Snapshot Isolation**: Each transaction sees a consistent snapshot of data +* **Strict Serializability**: Transactions execute as if in a single, total order that respects real-time ordering * **Conflict Detection**: Automatic handling of concurrent access to the same data * **Atomic Commitment**: All changes commit together or none at all * **Durable Writes**: Committed transactions survive node failures @@ -640,7 +640,9 @@ Accord transactions follow SQL standard null semantics, which differ from LWT (P ==== Migration from LWT -If your LWT code uses `IF column = null`, you must update it when migrating to Accord: +If your LWT code uses `IF column = null`, you must update it when migrating to Accord transaction syntax. + +NOTE: This does not affect LWT statements executed on Accord-enabled tables. Those statements retain their original LWT null semantics. **Before (LWT):** [source,cql] @@ -945,7 +947,7 @@ LET receiver_data = (SELECT balance FROM accounts WHERE user_id = 2); **High latency investigation:** -. Check transaction complexity (LET count < 5, partitions < 3) +. Check transaction complexity (LET count < 5, partitions < 3)footnote:[These are suggested starting points, not empirically derived limits.] . Monitor cross-datacenter operations . Check for high contention on specific keys