From c9ba6c36b68927e8f6d740c0a82ab1960b19a59c Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Mon, 2 Mar 2026 20:26:46 +0800 Subject: [PATCH] Support matview status maintenance for multi-table JOIN queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, GetViewBaseRelids() rejected any query with more than one base table, so materialized views defined with JOINs were never registered in gp_matview_aux/gp_matview_tables. This meant no status tracking and no staleness propagation for join matviews. Add a recursive helper extract_base_relids_from_jointree() that walks RangeTblRef, JoinExpr, and FromExpr nodes to collect all base relation OIDs. This is the only C function changed -- the existing downstream infrastructure (InsertMatviewTablesEntries, SetRelativeMatviewAuxStatus, MaintainMaterializedViewStatus, reference counting) already supports N base tables per matview. This is a first step toward AQUMV support for join queries. Users can also inspect a join matview's freshness status manually via gp_matview_aux. Key behaviors: - Self-joins (t1 JOIN t1) are deduplicated to one catalog entry - All join types supported: INNER, LEFT, RIGHT, FULL, implicit cross - Subquery/function RTEs in FROM are still rejected - Partitioned tables in joins propagate DML status correctly - Status escalation across multiple base tables works (i→e on delete) - Transaction rollback correctly reverts status changes Includes regression tests for: two/three-table joins, implicit joins, self-joins, all outer join types, mixed join types, join with GROUP BY, shared base tables across multiple MVs, multi-DML transactions, transaction rollback, cross joins, partitioned tables in joins, VACUUM FULL, TRUNCATE, WITH NO DATA, and DROP CASCADE. --- src/backend/catalog/gp_matview_aux.c | 137 +++-- src/test/regress/expected/matview_data.out | 555 +++++++++++++++++++-- src/test/regress/sql/matview_data.sql | 221 ++++++++ 3 files changed, 834 insertions(+), 79 deletions(-) diff --git a/src/backend/catalog/gp_matview_aux.c b/src/backend/catalog/gp_matview_aux.c index a119f86b6e5..8a32d6d115e 100644 --- a/src/backend/catalog/gp_matview_aux.c +++ b/src/backend/catalog/gp_matview_aux.c @@ -52,6 +52,9 @@ #include "optimizer/transform.h" #include "parser/parsetree.h" +static bool extract_base_relids_from_jointree(Node *jtnode, List *rtable, + List **relids, bool *has_foreign); + static void InsertMatviewTablesEntries(Oid mvoid, List *relids); static void RemoveMatviewTablesEntries(Oid mvoid); @@ -60,20 +63,106 @@ static void SetMatviewAuxStatus_guts(Oid mvoid, char status); static void addRelationMVRefCount(Oid relid, int32 mvrefcount); +/* + * extract_base_relids_from_jointree + * Recursively walk a join tree node and collect base relation OIDs. + * + * Handles RangeTblRef (leaf), JoinExpr (explicit JOIN), and FromExpr + * (implicit cross-join / comma-separated FROM list). + * + * Returns false if any unsupported RTE kind is found (subquery, function, + * CTE, etc.). Self-joins are deduplicated via list_append_unique_oid. + */ +static bool +extract_base_relids_from_jointree(Node *jtnode, List *rtable, + List **relids, bool *has_foreign) +{ + if (jtnode == NULL) + return false; + + if (IsA(jtnode, RangeTblRef)) + { + int rtindex = ((RangeTblRef *) jtnode)->rtindex; + RangeTblEntry *rte = rt_fetch(rtindex, rtable); + char relkind; + bool can_be_partition; + + if (rte->rtekind != RTE_RELATION) + return false; + + relkind = get_rel_relkind(rte->relid); + + /* + * Allow foreign table here, however we don't know if the data is + * up to date or not of the view. + * But if users want to query matview instead of query foreign tables + * outside CBDB, let them decide with aqumv_allow_foreign_table. + */ + if (relkind != RELKIND_RELATION && + relkind != RELKIND_PARTITIONED_TABLE && + relkind != RELKIND_FOREIGN_TABLE) + return false; + + if (has_foreign && relkind == RELKIND_FOREIGN_TABLE) + *has_foreign = true; + + /* + * Inherit tables are not supported. + */ + can_be_partition = (relkind == RELKIND_PARTITIONED_TABLE) || + get_rel_relispartition(rte->relid); + + if (!can_be_partition && + (has_superclass(rte->relid) || has_subclass(rte->relid))) + return false; + + /* Deduplicate for self-joins (t1 JOIN t1). */ + *relids = list_append_unique_oid(*relids, rte->relid); + + return true; + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + if (!extract_base_relids_from_jointree(j->larg, rtable, relids, has_foreign)) + return false; + if (!extract_base_relids_from_jointree(j->rarg, rtable, relids, has_foreign)) + return false; + + return true; + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *lc; + + foreach(lc, f->fromlist) + { + if (!extract_base_relids_from_jointree((Node *) lfirst(lc), + rtable, relids, has_foreign)) + return false; + } + + return true; + } + + /* Unsupported node type */ + return false; +} + /* * GetViewBaseRelids * Get all base tables's oid of a query tree. - * Currently there is only one base table, but there should be - * distinct func on it later. Self join tables: t1 join t1, will - * get only one oid. - * + * Supports single-table and multi-table (JOIN) queries. + * Self join tables: t1 join t1, will get only one oid. + * * Return NIL if the query we think it's useless. */ List* GetViewBaseRelids(const Query *viewQuery, bool *has_foreign) { List *relids = NIL; - Node *mvjtnode; if ((viewQuery->commandType != CMD_SELECT) || (viewQuery->rowMarks != NIL) || @@ -109,44 +198,14 @@ GetViewBaseRelids(const Query *viewQuery, bool *has_foreign) if (contain_mutable_functions((Node*)viewQuery)) return NIL; - if (list_length(viewQuery->jointree->fromlist) != 1) - return NIL; - - mvjtnode = (Node *) linitial(viewQuery->jointree->fromlist); - if (!IsA(mvjtnode, RangeTblRef)) - return NIL; - - RangeTblEntry *rte = rt_fetch(1, viewQuery->rtable); - if (rte->rtekind != RTE_RELATION) - return NIL; - - char relkind = get_rel_relkind(rte->relid); - - /* - * Allow foreign table here, however we don't know if the data is - * up to date or not of the view. - * But if users want to query matview instead of query foreign tables - * outside CBDB, let them decide with aqumv_allow_foreign_table. - */ - if (relkind != RELKIND_RELATION && - relkind != RELKIND_PARTITIONED_TABLE && - relkind != RELKIND_FOREIGN_TABLE) - return NIL; - if (has_foreign) - *has_foreign = relkind == RELKIND_FOREIGN_TABLE; + *has_foreign = false; - /* - * inherit tables are not supported. - */ - bool can_be_partition = (relkind == RELKIND_PARTITIONED_TABLE) || get_rel_relispartition(rte->relid); - - if (!can_be_partition && - (has_superclass(rte->relid) || has_subclass(rte->relid))) + if (!extract_base_relids_from_jointree((Node *) viewQuery->jointree, + viewQuery->rtable, + &relids, has_foreign)) return NIL; - relids = list_make1_oid(rte->relid); - return relids; } diff --git a/src/test/regress/expected/matview_data.out b/src/test/regress/expected/matview_data.out index 91a06d6cbf9..85697e90072 100644 --- a/src/test/regress/expected/matview_data.out +++ b/src/test/regress/expected/matview_data.out @@ -3,7 +3,11 @@ SET optimizer TO off; create schema matview_data_schema; set search_path to matview_data_schema; create table t1(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 5) i; insert into t1 select i, i+1 from generate_series(1, 3) i; create materialized view mv0 as select * from t1; @@ -27,6 +31,8 @@ select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 'mv2 -- truncate in self transaction begin; create table t3(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create materialized view mv3 as select * from t3; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. @@ -326,6 +332,465 @@ select mvname, datastatus from gp_matview_aux where mvname = 'mv_tp_issue_582'; (0 rows) abort; +-- +-- Test multi-table JOIN materialized views +-- +create table jt1(id int, val int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table jt2(id int, val int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table jt3(id int, val int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into jt1 select i, i*10 from generate_series(1,5) i; +insert into jt2 select i, i*100 from generate_series(1,5) i; +insert into jt3 select i, i*1000 from generate_series(1,5) i; +-- Two-table INNER JOIN: verify registration +create materialized view mv_join2 as + select jt1.id, jt1.val as v1, jt2.val as v2 + from jt1 join jt2 on jt1.id = jt2.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | u +(1 row) + +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_join2'::regclass; + count +------- + 2 +(1 row) + +-- INSERT on table A: status -> 'i' +insert into jt1 values(6, 60); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | i +(1 row) + +-- REFRESH: status -> 'u' +refresh materialized view mv_join2; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | u +(1 row) + +-- INSERT on table B: status -> 'i' +insert into jt2 values(7, 700); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | i +(1 row) + +-- UPDATE on table A: status -> 'e' +refresh materialized view mv_join2; +update jt1 set val = 99 where id = 1; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | e +(1 row) + +-- DELETE on table B: status -> 'e' +refresh materialized view mv_join2; +delete from jt2 where id = 7; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | e +(1 row) + +-- Implicit join (FROM t1, t2 WHERE ...): verify registration +refresh materialized view mv_join2; +create materialized view mv_implicit_join as + select jt1.id, jt1.val as v1, jt2.val as v2 + from jt1, jt2 where jt1.id = jt2.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_implicit_join'; + mvname | datastatus +------------------+------------ + mv_implicit_join | u +(1 row) + +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_implicit_join'::regclass; + count +------- + 2 +(1 row) + +-- Three-table join: verify 3 entries in gp_matview_tables +create materialized view mv_join3 as + select jt1.id, jt1.val as v1, jt2.val as v2, jt3.val as v3 + from jt1 join jt2 on jt1.id = jt2.id join jt3 on jt2.id = jt3.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + mvname | datastatus +----------+------------ + mv_join3 | u +(1 row) + +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_join3'::regclass; + count +------- + 3 +(1 row) + +-- DML on middle table expires mv_join3 +insert into jt2 values(8, 800); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + mvname | datastatus +----------+------------ + mv_join3 | i +(1 row) + +-- Self-join: verify only 1 entry in gp_matview_tables +create materialized view mv_selfjoin as + select a.id as aid, b.id as bid + from jt1 a join jt1 b on a.id = b.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'aid' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_selfjoin'; + mvname | datastatus +-------------+------------ + mv_selfjoin | u +(1 row) + +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_selfjoin'::regclass; + count +------- + 1 +(1 row) + +-- LEFT/RIGHT/FULL OUTER JOIN: verify all register correctly +create materialized view mv_left_join as + select jt1.id, jt1.val as v1, jt2.val as v2 + from jt1 left join jt2 on jt1.id = jt2.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_left_join'; + mvname | datastatus +--------------+------------ + mv_left_join | u +(1 row) + +create materialized view mv_right_join as + select jt1.id, jt2.val as v2 + from jt1 right join jt2 on jt1.id = jt2.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_right_join'; + mvname | datastatus +---------------+------------ + mv_right_join | u +(1 row) + +create materialized view mv_full_join as + select jt1.id as id1, jt2.id as id2 + from jt1 full join jt2 on jt1.id = jt2.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_full_join'; + mvname | datastatus +--------------+------------ + mv_full_join | u +(1 row) + +-- Partitioned table in join: verify partition DML propagates +create table jt_par(a int, b int) partition by range(a) + (start(1) end(3) every(1)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into jt_par values(1, 10), (2, 20); +create materialized view mv_join_par as + select jt1.id, jt1.val as v1, jt_par.a, jt_par.b + from jt1 join jt_par on jt1.id = jt_par.a; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_par'; + mvname | datastatus +-------------+------------ + mv_join_par | u +(1 row) + +insert into jt_par values(1, 11); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_par'; + mvname | datastatus +-------------+------------ + mv_join_par | i +(1 row) + +refresh materialized view mv_join_par; +insert into jt1 values(9, 90); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_par'; + mvname | datastatus +-------------+------------ + mv_join_par | i +(1 row) + +-- VACUUM FULL on one base table of a join MV: status -> 'r' +refresh materialized view mv_join2; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | u +(1 row) + +vacuum full jt1; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | r +(1 row) + +-- TRUNCATE on one base table of a join MV: status -> 'e' +refresh materialized view mv_join2; +truncate jt2; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | e +(1 row) + +-- CREATE WITH NO DATA: status -> 'e' +create materialized view mv_join_nodata as + select jt1.id, jt3.val from jt1 join jt3 on jt1.id = jt3.id + with no data; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_nodata'; + mvname | datastatus +----------------+------------ + mv_join_nodata | e +(1 row) + +-- DROP CASCADE: matview and aux entries removed +drop materialized view mv_join_nodata; +select count(*) from gp_matview_aux where mvname = 'mv_join_nodata'; + count +------- + 0 +(1 row) + +-- Mixed join types in one view (INNER + LEFT) +create materialized view mv_mixed_join as + select jt1.id, jt2.val as v2, jt3.val as v3 + from jt1 join jt2 on jt1.id = jt2.id left join jt3 on jt2.id = jt3.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_mixed_join'; + mvname | datastatus +---------------+------------ + mv_mixed_join | u +(1 row) + +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_mixed_join'::regclass; + count +------- + 3 +(1 row) + +-- Join with GROUP BY and aggregates +create materialized view mv_join_agg as + select jt1.id, count(*) as cnt, sum(jt2.val) as total + from jt1 join jt2 on jt1.id = jt2.id group by jt1.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_agg'; + mvname | datastatus +-------------+------------ + mv_join_agg | u +(1 row) + +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_join_agg'::regclass; + count +------- + 2 +(1 row) + +-- Multiple MVs sharing base tables: DML on one table affects all dependent MVs +refresh materialized view mv_join2; +refresh materialized view mv_join3; +refresh materialized view mv_mixed_join; +refresh materialized view mv_join_agg; +select mvname, datastatus from gp_matview_aux + where mvname in ('mv_join2', 'mv_join3', 'mv_mixed_join', 'mv_join_agg') + order by mvname; + mvname | datastatus +---------------+------------ + mv_join2 | u + mv_join3 | u + mv_join_agg | u + mv_mixed_join | u +(4 rows) + +insert into jt2 values(10, 1000); +-- all four share jt2 as a base table +select mvname, datastatus from gp_matview_aux + where mvname in ('mv_join2', 'mv_join3', 'mv_mixed_join', 'mv_join_agg') + order by mvname; + mvname | datastatus +---------------+------------ + mv_join2 | i + mv_join3 | i + mv_join_agg | i + mv_mixed_join | i +(4 rows) + +-- Transaction: multiple DML on different base tables +refresh materialized view mv_join3; +begin; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + mvname | datastatus +----------+------------ + mv_join3 | u +(1 row) + +insert into jt1 values(20, 200); +-- after insert: 'i' (insert-only) +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + mvname | datastatus +----------+------------ + mv_join3 | i +(1 row) + +delete from jt2 where id = 10; +-- after delete: escalates to 'e' (expired) +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + mvname | datastatus +----------+------------ + mv_join3 | e +(1 row) + +end; +-- committed: status persists as 'e' +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + mvname | datastatus +----------+------------ + mv_join3 | e +(1 row) + +-- Transaction rollback: status should revert +refresh materialized view mv_join3; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + mvname | datastatus +----------+------------ + mv_join3 | u +(1 row) + +begin; +update jt1 set val = 999 where id = 1; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + mvname | datastatus +----------+------------ + mv_join3 | e +(1 row) + +abort; +-- after rollback: back to 'u' +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + mvname | datastatus +----------+------------ + mv_join3 | u +(1 row) + +-- Transaction: insert then insert on different tables stays 'i' +refresh materialized view mv_join2; +begin; +insert into jt1 values(30, 300); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | i +(1 row) + +insert into jt2 values(31, 3100); +-- still 'i' since both are inserts +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + mvname | datastatus +----------+------------ + mv_join2 | i +(1 row) + +abort; +-- Cross join (FROM t1, t2 with no WHERE): verify registration +create materialized view mv_cross_join as + select jt1.id as id1, jt2.id as id2 from jt1, jt2; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id2' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_cross_join'; + mvname | datastatus +---------------+------------ + mv_cross_join | u +(1 row) + +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_cross_join'::regclass; + count +------- + 2 +(1 row) + +-- Drop base table CASCADE removes dependent join MVs and aux entries +create table jt_drop(id int, val int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into jt_drop values(1, 10); +create materialized view mv_join_drop as + select jt1.id, jt_drop.val from jt1 join jt_drop on jt1.id = jt_drop.id; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_drop'; + mvname | datastatus +--------------+------------ + mv_join_drop | u +(1 row) + +drop table jt_drop cascade; +NOTICE: drop cascades to materialized view mv_join_drop +select count(*) from gp_matview_aux where mvname = 'mv_join_drop'; + count +------- + 0 +(1 row) + +-- Clean up join test objects +drop materialized view mv_cross_join; +drop materialized view mv_join_agg; +drop materialized view mv_mixed_join; +drop materialized view mv_join_par; +drop table jt_par cascade; +drop materialized view mv_full_join; +drop materialized view mv_right_join; +drop materialized view mv_left_join; +drop materialized view mv_selfjoin; +drop materialized view mv_join3; +drop materialized view mv_implicit_join; +drop materialized view mv_join2; +drop table jt3; +drop table jt2; +drop table jt1; -- test drop table select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 'mv2', 'mv3'); mvname | datastatus @@ -460,12 +925,12 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | u + mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2 | u - mv_par2_1 | u (6 rows) insert into par_1_prt_1 values (1, 1, 1); @@ -473,9 +938,9 @@ insert into par_1_prt_1 values (1, 1, 1); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_2 | u mv_par2 | u mv_par2_1 | u + mv_par1_2 | u mv_par1_1 | i mv_par1 | i mv_par | i @@ -486,12 +951,12 @@ insert into par values (1, 2, 2); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_2 | u + mv_par2 | i mv_par2_1 | u + mv_par1_2 | u mv_par1_1 | i mv_par1 | i mv_par | i - mv_par2 | i (6 rows) refresh materialized view mv_par; @@ -506,11 +971,11 @@ insert into par_1_prt_2_2_prt_1 values (1, 2, 1); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | i + mv_par2_1 | i + mv_par1_2 | u mv_par1 | u mv_par1_1 | u - mv_par1_2 | u - mv_par2_1 | i - mv_par2 | i mv_par | i (6 rows) @@ -521,11 +986,11 @@ truncate par_1_prt_2; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | e + mv_par2_1 | e + mv_par1_2 | u mv_par1 | u mv_par1_1 | u - mv_par1_2 | u - mv_par2_1 | e - mv_par2 | e mv_par | e (6 rows) @@ -535,11 +1000,11 @@ truncate par_1_prt_2; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | e + mv_par2_1 | e + mv_par1_2 | u mv_par1 | u mv_par1_1 | u - mv_par1_2 | u - mv_par2_1 | e - mv_par2 | e mv_par | e (6 rows) @@ -553,9 +1018,9 @@ vacuum full par_1_prt_1_2_prt_1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_2 | u mv_par2 | u mv_par2_1 | u + mv_par1_2 | u mv_par1_1 | r mv_par1 | r mv_par | r @@ -573,8 +1038,8 @@ select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ mv_par2 | r - mv_par | r mv_par2_1 | r + mv_par | r mv_par1_2 | r mv_par1 | r mv_par1_1 | r @@ -593,10 +1058,10 @@ NOTICE: table has parent, setting distribution columns to match parent table select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_1 | u - mv_par1_2 | u mv_par2 | u mv_par2_1 | u + mv_par1_1 | u + mv_par1_2 | u mv_par1 | e mv_par | e (6 rows) @@ -624,10 +1089,10 @@ alter table par_1_prt_1 detach partition par_1_prt_1_2_prt_1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_1 | u - mv_par1_2 | u mv_par2 | u mv_par2_1 | u + mv_par1_1 | u + mv_par1_2 | u mv_par1 | e mv_par | e (6 rows) @@ -642,10 +1107,10 @@ alter table par_1_prt_1 attach partition new_par for values from (4) to (5); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_1 | u - mv_par1_2 | u mv_par2 | u mv_par2_1 | u + mv_par1_1 | u + mv_par1_2 | u mv_par1 | e mv_par | e (6 rows) @@ -665,12 +1130,12 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | u + mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2 | u - mv_par2_1 | u (6 rows) insert into par values(1, 1, 1), (1, 1, 2); @@ -690,23 +1155,23 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | u + mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2 | u - mv_par2_1 | u (6 rows) insert into par_1_prt_2_2_prt_1 values(2, 2, 1); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | i + mv_par2_1 | i mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2_1 | i - mv_par2 | i mv_par | i (6 rows) @@ -715,23 +1180,23 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | u + mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2 | u - mv_par2_1 | u (6 rows) delete from par where b = 2 and c = 1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | e + mv_par2_1 | e mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2_1 | e - mv_par2 | e mv_par | e (6 rows) @@ -740,21 +1205,21 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | u + mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2 | u - mv_par2_1 | u (6 rows) delete from par_1_prt_1_2_prt_2; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_1 | u mv_par2 | u mv_par2_1 | u + mv_par1_1 | u mv_par1_2 | e mv_par1 | e mv_par | e @@ -766,12 +1231,12 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | u + mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2 | u - mv_par2_1 | u (6 rows) update par set c = 2 where b = 1 and c = 1; @@ -792,12 +1257,12 @@ begin; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par2 | u + mv_par2_1 | u mv_par | u mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2 | u - mv_par2_1 | u (6 rows) update par set c = 2, a = 2 where b = 1 and c = 1; @@ -937,10 +1402,20 @@ select mvname, datastatus from gp_matview_aux where mvname = 'mv_par_normal_oid' --start_ignore drop schema matview_data_schema cascade; -NOTICE: drop cascades to 3 other objects +NOTICE: drop cascades to 13 other objects DETAIL: drop cascades to table t2 drop cascades to table t3 drop cascades to materialized view mv3 +drop cascades to table par +drop cascades to materialized view mv_par +drop cascades to materialized view mv_par1 +drop cascades to materialized view mv_par1_1 +drop cascades to materialized view mv_par1_2 +drop cascades to materialized view mv_par2 +drop cascades to materialized view mv_par2_1 +drop cascades to extension gp_inject_fault +drop cascades to table par_normal_oid +drop cascades to materialized view mv_par_normal_oid --end_ignore reset enable_answer_query_using_materialized_views; reset optimizer; diff --git a/src/test/regress/sql/matview_data.sql b/src/test/regress/sql/matview_data.sql index d1d7af4c7fc..059a5a97bf4 100644 --- a/src/test/regress/sql/matview_data.sql +++ b/src/test/regress/sql/matview_data.sql @@ -122,6 +122,227 @@ create materialized view mv_tp_issue_582 as select * from tp_issue_582; select mvname, datastatus from gp_matview_aux where mvname = 'mv_tp_issue_582'; abort; +-- +-- Test multi-table JOIN materialized views +-- +create table jt1(id int, val int); +create table jt2(id int, val int); +create table jt3(id int, val int); +insert into jt1 select i, i*10 from generate_series(1,5) i; +insert into jt2 select i, i*100 from generate_series(1,5) i; +insert into jt3 select i, i*1000 from generate_series(1,5) i; + +-- Two-table INNER JOIN: verify registration +create materialized view mv_join2 as + select jt1.id, jt1.val as v1, jt2.val as v2 + from jt1 join jt2 on jt1.id = jt2.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_join2'::regclass; + +-- INSERT on table A: status -> 'i' +insert into jt1 values(6, 60); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + +-- REFRESH: status -> 'u' +refresh materialized view mv_join2; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + +-- INSERT on table B: status -> 'i' +insert into jt2 values(7, 700); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + +-- UPDATE on table A: status -> 'e' +refresh materialized view mv_join2; +update jt1 set val = 99 where id = 1; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + +-- DELETE on table B: status -> 'e' +refresh materialized view mv_join2; +delete from jt2 where id = 7; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + +-- Implicit join (FROM t1, t2 WHERE ...): verify registration +refresh materialized view mv_join2; +create materialized view mv_implicit_join as + select jt1.id, jt1.val as v1, jt2.val as v2 + from jt1, jt2 where jt1.id = jt2.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_implicit_join'; +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_implicit_join'::regclass; + +-- Three-table join: verify 3 entries in gp_matview_tables +create materialized view mv_join3 as + select jt1.id, jt1.val as v1, jt2.val as v2, jt3.val as v3 + from jt1 join jt2 on jt1.id = jt2.id join jt3 on jt2.id = jt3.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_join3'::regclass; +-- DML on middle table expires mv_join3 +insert into jt2 values(8, 800); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + +-- Self-join: verify only 1 entry in gp_matview_tables +create materialized view mv_selfjoin as + select a.id as aid, b.id as bid + from jt1 a join jt1 b on a.id = b.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_selfjoin'; +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_selfjoin'::regclass; + +-- LEFT/RIGHT/FULL OUTER JOIN: verify all register correctly +create materialized view mv_left_join as + select jt1.id, jt1.val as v1, jt2.val as v2 + from jt1 left join jt2 on jt1.id = jt2.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_left_join'; +create materialized view mv_right_join as + select jt1.id, jt2.val as v2 + from jt1 right join jt2 on jt1.id = jt2.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_right_join'; +create materialized view mv_full_join as + select jt1.id as id1, jt2.id as id2 + from jt1 full join jt2 on jt1.id = jt2.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_full_join'; + +-- Partitioned table in join: verify partition DML propagates +create table jt_par(a int, b int) partition by range(a) + (start(1) end(3) every(1)); +insert into jt_par values(1, 10), (2, 20); +create materialized view mv_join_par as + select jt1.id, jt1.val as v1, jt_par.a, jt_par.b + from jt1 join jt_par on jt1.id = jt_par.a; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_par'; +insert into jt_par values(1, 11); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_par'; +refresh materialized view mv_join_par; +insert into jt1 values(9, 90); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_par'; + +-- VACUUM FULL on one base table of a join MV: status -> 'r' +refresh materialized view mv_join2; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; +vacuum full jt1; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + +-- TRUNCATE on one base table of a join MV: status -> 'e' +refresh materialized view mv_join2; +truncate jt2; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; + +-- CREATE WITH NO DATA: status -> 'e' +create materialized view mv_join_nodata as + select jt1.id, jt3.val from jt1 join jt3 on jt1.id = jt3.id + with no data; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_nodata'; + +-- DROP CASCADE: matview and aux entries removed +drop materialized view mv_join_nodata; +select count(*) from gp_matview_aux where mvname = 'mv_join_nodata'; + +-- Mixed join types in one view (INNER + LEFT) +create materialized view mv_mixed_join as + select jt1.id, jt2.val as v2, jt3.val as v3 + from jt1 join jt2 on jt1.id = jt2.id left join jt3 on jt2.id = jt3.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_mixed_join'; +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_mixed_join'::regclass; + +-- Join with GROUP BY and aggregates +create materialized view mv_join_agg as + select jt1.id, count(*) as cnt, sum(jt2.val) as total + from jt1 join jt2 on jt1.id = jt2.id group by jt1.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_agg'; +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_join_agg'::regclass; + +-- Multiple MVs sharing base tables: DML on one table affects all dependent MVs +refresh materialized view mv_join2; +refresh materialized view mv_join3; +refresh materialized view mv_mixed_join; +refresh materialized view mv_join_agg; +select mvname, datastatus from gp_matview_aux + where mvname in ('mv_join2', 'mv_join3', 'mv_mixed_join', 'mv_join_agg') + order by mvname; +insert into jt2 values(10, 1000); +-- all four share jt2 as a base table +select mvname, datastatus from gp_matview_aux + where mvname in ('mv_join2', 'mv_join3', 'mv_mixed_join', 'mv_join_agg') + order by mvname; + +-- Transaction: multiple DML on different base tables +refresh materialized view mv_join3; +begin; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; +insert into jt1 values(20, 200); +-- after insert: 'i' (insert-only) +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; +delete from jt2 where id = 10; +-- after delete: escalates to 'e' (expired) +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; +end; +-- committed: status persists as 'e' +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + +-- Transaction rollback: status should revert +refresh materialized view mv_join3; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; +begin; +update jt1 set val = 999 where id = 1; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; +abort; +-- after rollback: back to 'u' +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; + +-- Transaction: insert then insert on different tables stays 'i' +refresh materialized view mv_join2; +begin; +insert into jt1 values(30, 300); +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; +insert into jt2 values(31, 3100); +-- still 'i' since both are inserts +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; +abort; + +-- Cross join (FROM t1, t2 with no WHERE): verify registration +create materialized view mv_cross_join as + select jt1.id as id1, jt2.id as id2 from jt1, jt2; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_cross_join'; +select count(*) from gp_matview_tables mt + join pg_class c on mt.relid = c.oid + where mt.mvoid = 'mv_cross_join'::regclass; + +-- Drop base table CASCADE removes dependent join MVs and aux entries +create table jt_drop(id int, val int); +insert into jt_drop values(1, 10); +create materialized view mv_join_drop as + select jt1.id, jt_drop.val from jt1 join jt_drop on jt1.id = jt_drop.id; +select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_drop'; +drop table jt_drop cascade; +select count(*) from gp_matview_aux where mvname = 'mv_join_drop'; + +-- Clean up join test objects +drop materialized view mv_cross_join; +drop materialized view mv_join_agg; +drop materialized view mv_mixed_join; +drop materialized view mv_join_par; +drop table jt_par cascade; +drop materialized view mv_full_join; +drop materialized view mv_right_join; +drop materialized view mv_left_join; +drop materialized view mv_selfjoin; +drop materialized view mv_join3; +drop materialized view mv_implicit_join; +drop materialized view mv_join2; +drop table jt3; +drop table jt2; +drop table jt1; + -- test drop table select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 'mv2', 'mv3'); drop materialized view mv2;