diff --git a/.github/workflows/build-gpdb.yml b/.github/workflows/build-gpdb.yml index af336600a3f..e3706ad4cb1 100644 --- a/.github/workflows/build-gpdb.yml +++ b/.github/workflows/build-gpdb.yml @@ -156,7 +156,8 @@ jobs: "contrib/xml2:installcheck"] }, {"test":"ic-gpcontrib", - "make_configs":["gpcontrib/gp_array_agg:installcheck", + "make_configs":["gpcontrib/access_log:installcheck", + "gpcontrib/gp_array_agg:installcheck", "gpcontrib/gp_debug_numsegments:installcheck", "gpcontrib/gp_distribution_policy:installcheck", "gpcontrib/gp_error_handling:installcheck", diff --git a/gpcontrib/Makefile b/gpcontrib/Makefile index 1af39741c56..b9c4eeb043f 100644 --- a/gpcontrib/Makefile +++ b/gpcontrib/Makefile @@ -12,7 +12,8 @@ include $(top_builddir)/src/Makefile.global recurse_targets = "" ifeq "$(enable_debug_extensions)" "yes" - recurse_targets = gp_sparse_vector \ + recurse_targets = access_log \ + gp_sparse_vector \ gp_distribution_policy \ gp_parallel_retrieve_cursor \ gp_internal_tools \ @@ -30,7 +31,8 @@ ifeq "$(enable_debug_extensions)" "yes" gp_interconnect_stats \ temp_tables_stat else - recurse_targets = gp_sparse_vector \ + recurse_targets = access_log \ + gp_sparse_vector \ gp_distribution_policy \ gp_parallel_retrieve_cursor \ gp_internal_tools \ @@ -130,6 +132,7 @@ distclean: if [ "${enable_orafce}" = "yes" ]; then $(MAKE) -C orafce NO_PGXS=true distclean; fi installcheck: + $(MAKE) -C access_log installcheck $(MAKE) -C gp_internal_tools installcheck $(MAKE) -C gp_array_agg installcheck $(MAKE) -C gp_aux_catalog installcheck diff --git a/gpcontrib/access_log/Makefile b/gpcontrib/access_log/Makefile new file mode 100644 index 00000000000..e1cafa85af1 --- /dev/null +++ b/gpcontrib/access_log/Makefile @@ -0,0 +1,17 @@ +# gpcontrib/access_log/Makefile + +MODULE_big = access_log +OBJS = access_log.o + +REGRESS = access_log + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = gpcontrib/access_log +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/gpcontrib/access_log/README.md b/gpcontrib/access_log/README.md new file mode 100644 index 00000000000..9f607ac0f7a --- /dev/null +++ b/gpcontrib/access_log/README.md @@ -0,0 +1,12 @@ +# access_log + +The extension logs when and which user initializes Seq Scan on which partition +or table. The log file name is pg_log/access.log. To activate the extension you +can use the shared_preload_libraries GUC or the LOAD command. + +If you want to register Seq Scans on segments only you should load access_log +on segments and don't load on master: + +``` +gpconfig -c shared_preload_libraries -v 'access_log' -m '' +``` diff --git a/gpcontrib/access_log/access_log.c b/gpcontrib/access_log/access_log.c new file mode 100644 index 00000000000..424e33fbb6f --- /dev/null +++ b/gpcontrib/access_log/access_log.c @@ -0,0 +1,91 @@ +#include "postgres.h" + +#include "catalog/pg_namespace.h" +#include "executor/nodeSeqscan.h" +#include "libpq/auth.h" +#include "utils/syscache.h" + + +PG_MODULE_MAGIC; + +#define LOG_FILE_NAME "pg_log/access.log" + +void _PG_init(void); + + +static init_scan_hook_type next_init_scan_hook = NULL; + +static void +write_to_log(const char* str) +{ + /* + * Synchronization is not necessary because `man write` says: + * "If the file was open(2)ed with O_APPEND, the file offset is first set to + * the end of the file before writing. The adjustment of the file offset and + * the write operation are performed as an atomic step." + */ + int f = open(LOG_FILE_NAME, O_WRONLY | O_APPEND | O_CREAT, S_IRUSR | S_IWUSR); + if (f < 0 ) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not open file " LOG_FILE_NAME))); + return; + } + write(f, str, strlen(str)); + close(f); +} + +static void +access_log_init_scan_hook(Relation currentRelation) +{ + char buf[512]; + HeapTuple tp; + struct timeval tv; + + gettimeofday(&tv, NULL); + pg_strftime(buf, sizeof(buf), + "%Y-%m-%d %H:%M:%S %Z,", + pg_localtime((pg_time_t*)&tv.tv_sec, log_timezone)); + + /* paste milliseconds into place */ + sprintf(buf + 19, ".%06d", (int) (tv.tv_usec)); + /* restore space which has been rewritten by sprintf with \0 */ + buf[19 + 1 + 6] = ' '; + + if (MyProcPort != NULL && MyProcPort->user_name != NULL) + strlcat(buf, MyProcPort->user_name, sizeof(buf)); + strlcat(buf, ",", sizeof(buf)); + + tp = SearchSysCache1(NAMESPACEOID, + ObjectIdGetDatum(currentRelation->rd_rel->relnamespace)); + if (HeapTupleIsValid(tp)) + { + Form_pg_namespace nsptup = (Form_pg_namespace) GETSTRUCT(tp); + strlcat(buf, NameStr(nsptup->nspname), sizeof(buf)); + strlcat(buf, ".", sizeof(buf)); + ReleaseSysCache(tp); + } + + strlcat(buf, NameStr(currentRelation->rd_rel->relname), sizeof(buf)); + strlcat(buf, "\n", sizeof(buf)); + + write_to_log(buf); + + if (next_init_scan_hook) + next_init_scan_hook(currentRelation); +} + +void +_PG_init(void) +{ + /* Be sure we do initialization only once */ + static bool inited = false; + + if (inited) + return; + + next_init_scan_hook = init_scan_hook; + init_scan_hook = access_log_init_scan_hook; + + inited = true; +} diff --git a/gpcontrib/access_log/expected/access_log.out b/gpcontrib/access_log/expected/access_log.out new file mode 100644 index 00000000000..ea3b3709987 --- /dev/null +++ b/gpcontrib/access_log/expected/access_log.out @@ -0,0 +1,507 @@ +-- Create external table to read log files from segments +do +$$ +declare + seg text; + locations text := ''; +begin + for seg in + select hostname || ':' || port || datadir + from gp_segment_configuration + where role = 'p' and content >= 0 + loop + if locations != '' then + locations = locations || ','; + end if; + + locations = locations || '''file://' || seg || '/pg_log/access.log'''; + end loop; + + execute 'create external table access_log + (logtime timestamp with time zone, loguser text, tbl text) + location (' || locations || ') format ''csv'''; +end $$; +-- Start logging +load '$libdir/access_log.so'; +create or replace function show_log(before_query text) + returns table(gp_segment_id int, date_ok bool, user_ok bool, tbl text) +as $$ + select gp_segment_id, + logtime between before_query::timestamp with time zone and now() date_ok, + loguser=current_user user_ok, + tbl + from access_log; +$$ language sql; +-- +-- Heap table +-- Partitioned table +create table t_heap_part(a int, b int, c int) +distributed by (a) +partition by range (b) + subpartition by range (c) + subpartition template (start (40) end (46) every (3)) +(start (0) end (4) every (2)); +NOTICE: CREATE TABLE will create partition "t_heap_part_1_prt_1" for table "t_heap_part" +NOTICE: CREATE TABLE will create partition "t_heap_part_1_prt_1_2_prt_1" for table "t_heap_part_1_prt_1" +NOTICE: CREATE TABLE will create partition "t_heap_part_1_prt_1_2_prt_2" for table "t_heap_part_1_prt_1" +NOTICE: CREATE TABLE will create partition "t_heap_part_1_prt_2" for table "t_heap_part" +NOTICE: CREATE TABLE will create partition "t_heap_part_1_prt_2_2_prt_1" for table "t_heap_part_1_prt_2" +NOTICE: CREATE TABLE will create partition "t_heap_part_1_prt_2_2_prt_2" for table "t_heap_part_1_prt_2" +-- All partitions +select now() as before_query \gset +select * from t_heap_part; + a | b | c +---+---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+------------------------------------ + 2 | t | t | public.t_heap_part_1_prt_1_2_prt_1 + 2 | t | t | public.t_heap_part_1_prt_1_2_prt_2 + 2 | t | t | public.t_heap_part_1_prt_2_2_prt_1 + 2 | t | t | public.t_heap_part_1_prt_2_2_prt_2 + 0 | t | t | public.t_heap_part_1_prt_1_2_prt_1 + 0 | t | t | public.t_heap_part_1_prt_1_2_prt_2 + 0 | t | t | public.t_heap_part_1_prt_2_2_prt_1 + 0 | t | t | public.t_heap_part_1_prt_2_2_prt_2 + 1 | t | t | public.t_heap_part_1_prt_1_2_prt_1 + 1 | t | t | public.t_heap_part_1_prt_1_2_prt_2 + 1 | t | t | public.t_heap_part_1_prt_2_2_prt_1 + 1 | t | t | public.t_heap_part_1_prt_2_2_prt_2 +(12 rows) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +-- Read partitions selected by condition in WHERE +select now() as before_query \gset +select * from t_heap_part where c = 40; + a | b | c +---+---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+------------------------------------ + 0 | t | t | public.t_heap_part_1_prt_1_2_prt_1 + 0 | t | t | public.t_heap_part_1_prt_2_2_prt_1 + 1 | t | t | public.t_heap_part_1_prt_1_2_prt_1 + 1 | t | t | public.t_heap_part_1_prt_2_2_prt_1 + 2 | t | t | public.t_heap_part_1_prt_1_2_prt_1 + 2 | t | t | public.t_heap_part_1_prt_2_2_prt_1 +(6 rows) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +select now() as before_query \gset +select * from t_heap_part where b = 0; + a | b | c +---+---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+------------------------------------ + 0 | t | t | public.t_heap_part_1_prt_1_2_prt_1 + 0 | t | t | public.t_heap_part_1_prt_1_2_prt_2 + 1 | t | t | public.t_heap_part_1_prt_1_2_prt_1 + 1 | t | t | public.t_heap_part_1_prt_1_2_prt_2 + 2 | t | t | public.t_heap_part_1_prt_1_2_prt_1 + 2 | t | t | public.t_heap_part_1_prt_1_2_prt_2 +(6 rows) + +-- Don't delete files, because the next select will lead to adding log records +-- on one segment only. Reading from an external table fails when log file does +-- not exist on any segment. +-- One segment +select now() as before_query \gset +select * from t_heap_part where a = 0; + a | b | c +---+---+--- +(0 rows) + +select count(distinct gp_segment_id), string_agg(tbl, ',' order by tbl) + from show_log(:'before_query') + where date_ok and user_ok; + count | string_agg +-------+--------------------------------------------------------------------------------------------------------------------------------------------- + 1 | public.t_heap_part_1_prt_1_2_prt_1,public.t_heap_part_1_prt_1_2_prt_2,public.t_heap_part_1_prt_2_2_prt_1,public.t_heap_part_1_prt_2_2_prt_2 +(1 row) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +-- Table without partitions +create table t_heap(a int) +distributed by (a); +select now() as before_query \gset +select * from t_heap; + a +--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+--------------- + 2 | t | t | public.t_heap + 0 | t | t | public.t_heap + 1 | t | t | public.t_heap +(3 rows) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +-- +-- AO table +-- Partitioned table +create table t_ao_part(a int, b int, c int) +with (appendonly = true) +distributed by (a) +partition by range (b) + subpartition by range (c) + subpartition template (start (40) end (46) every (3)) +(start (0) end (4) every (2)); +NOTICE: CREATE TABLE will create partition "t_ao_part_1_prt_1" for table "t_ao_part" +NOTICE: CREATE TABLE will create partition "t_ao_part_1_prt_1_2_prt_1" for table "t_ao_part_1_prt_1" +NOTICE: CREATE TABLE will create partition "t_ao_part_1_prt_1_2_prt_2" for table "t_ao_part_1_prt_1" +NOTICE: CREATE TABLE will create partition "t_ao_part_1_prt_2" for table "t_ao_part" +NOTICE: CREATE TABLE will create partition "t_ao_part_1_prt_2_2_prt_1" for table "t_ao_part_1_prt_2" +NOTICE: CREATE TABLE will create partition "t_ao_part_1_prt_2_2_prt_2" for table "t_ao_part_1_prt_2" +-- All partitions +select now() as before_query \gset +select * from t_ao_part; + a | b | c +---+---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+---------------------------------- + 0 | t | t | public.t_ao_part_1_prt_1_2_prt_1 + 0 | t | t | public.t_ao_part_1_prt_2_2_prt_1 + 0 | t | t | public.t_ao_part_1_prt_1_2_prt_2 + 0 | t | t | public.t_ao_part_1_prt_2_2_prt_2 + 1 | t | t | public.t_ao_part_1_prt_1_2_prt_1 + 1 | t | t | public.t_ao_part_1_prt_2_2_prt_1 + 1 | t | t | public.t_ao_part_1_prt_1_2_prt_2 + 1 | t | t | public.t_ao_part_1_prt_2_2_prt_2 + 2 | t | t | public.t_ao_part_1_prt_1_2_prt_1 + 2 | t | t | public.t_ao_part_1_prt_2_2_prt_1 + 2 | t | t | public.t_ao_part_1_prt_1_2_prt_2 + 2 | t | t | public.t_ao_part_1_prt_2_2_prt_2 +(12 rows) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +-- Read partitions selected by condition in WHERE +select now() as before_query \gset +select * from t_ao_part where c = 40; + a | b | c +---+---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+---------------------------------- + 1 | t | t | public.t_ao_part_1_prt_1_2_prt_1 + 1 | t | t | public.t_ao_part_1_prt_2_2_prt_1 + 2 | t | t | public.t_ao_part_1_prt_1_2_prt_1 + 2 | t | t | public.t_ao_part_1_prt_2_2_prt_1 + 0 | t | t | public.t_ao_part_1_prt_1_2_prt_1 + 0 | t | t | public.t_ao_part_1_prt_2_2_prt_1 +(6 rows) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +select now() as before_query \gset +select * from t_ao_part where b = 0; + a | b | c +---+---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+---------------------------------- + 0 | t | t | public.t_ao_part_1_prt_1_2_prt_1 + 0 | t | t | public.t_ao_part_1_prt_1_2_prt_2 + 1 | t | t | public.t_ao_part_1_prt_1_2_prt_1 + 1 | t | t | public.t_ao_part_1_prt_1_2_prt_2 + 2 | t | t | public.t_ao_part_1_prt_1_2_prt_1 + 2 | t | t | public.t_ao_part_1_prt_1_2_prt_2 +(6 rows) + +-- One segment +select now() as before_query \gset +select * from t_ao_part where a = 0; + a | b | c +---+---+--- +(0 rows) + +select count(distinct gp_segment_id), string_agg(tbl, ',' order by tbl) + from show_log(:'before_query') + where date_ok and user_ok; + count | string_agg +-------+------------------------------------------------------------------------------------------------------------------------------------- + 1 | public.t_ao_part_1_prt_1_2_prt_1,public.t_ao_part_1_prt_1_2_prt_2,public.t_ao_part_1_prt_2_2_prt_1,public.t_ao_part_1_prt_2_2_prt_2 +(1 row) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +-- Table without partitions +create table t_ao(a int, b int) +distributed by (a); +select now() as before_query \gset +select * from t_ao; + a | b +---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+------------- + 2 | t | t | public.t_ao + 1 | t | t | public.t_ao + 0 | t | t | public.t_ao +(3 rows) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +-- +-- AOCO table +-- Partitioned table +create table t_aoco_part(a int, b int, c int) +with (appendonly = true, orientation = column) +distributed by (a) +partition by range (b) + subpartition by range (c) + subpartition template (start (40) end (46) every (3)) +(start (0) end (4) every (2)); +NOTICE: CREATE TABLE will create partition "t_aoco_part_1_prt_1" for table "t_aoco_part" +NOTICE: CREATE TABLE will create partition "t_aoco_part_1_prt_1_2_prt_1" for table "t_aoco_part_1_prt_1" +NOTICE: CREATE TABLE will create partition "t_aoco_part_1_prt_1_2_prt_2" for table "t_aoco_part_1_prt_1" +NOTICE: CREATE TABLE will create partition "t_aoco_part_1_prt_2" for table "t_aoco_part" +NOTICE: CREATE TABLE will create partition "t_aoco_part_1_prt_2_2_prt_1" for table "t_aoco_part_1_prt_2" +NOTICE: CREATE TABLE will create partition "t_aoco_part_1_prt_2_2_prt_2" for table "t_aoco_part_1_prt_2" +-- All partitions +select now() as before_query \gset +select * from t_aoco_part; + a | b | c +---+---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+------------------------------------ + 1 | t | t | public.t_aoco_part_1_prt_1_2_prt_1 + 1 | t | t | public.t_aoco_part_1_prt_2_2_prt_1 + 1 | t | t | public.t_aoco_part_1_prt_2_2_prt_2 + 1 | t | t | public.t_aoco_part_1_prt_1_2_prt_2 + 2 | t | t | public.t_aoco_part_1_prt_1_2_prt_1 + 2 | t | t | public.t_aoco_part_1_prt_2_2_prt_1 + 2 | t | t | public.t_aoco_part_1_prt_2_2_prt_2 + 2 | t | t | public.t_aoco_part_1_prt_1_2_prt_2 + 0 | t | t | public.t_aoco_part_1_prt_1_2_prt_1 + 0 | t | t | public.t_aoco_part_1_prt_2_2_prt_1 + 0 | t | t | public.t_aoco_part_1_prt_2_2_prt_2 + 0 | t | t | public.t_aoco_part_1_prt_1_2_prt_2 +(12 rows) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +-- Read partitions selected by condition in WHERE +select now() as before_query \gset +select * from t_aoco_part where c = 40; + a | b | c +---+---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+------------------------------------ + 1 | t | t | public.t_aoco_part_1_prt_1_2_prt_1 + 1 | t | t | public.t_aoco_part_1_prt_2_2_prt_1 + 2 | t | t | public.t_aoco_part_1_prt_1_2_prt_1 + 2 | t | t | public.t_aoco_part_1_prt_2_2_prt_1 + 0 | t | t | public.t_aoco_part_1_prt_1_2_prt_1 + 0 | t | t | public.t_aoco_part_1_prt_2_2_prt_1 +(6 rows) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +select now() as before_query \gset +select * from t_aoco_part where b = 0; + a | b | c +---+---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+------------------------------------ + 0 | t | t | public.t_aoco_part_1_prt_1_2_prt_1 + 0 | t | t | public.t_aoco_part_1_prt_1_2_prt_2 + 1 | t | t | public.t_aoco_part_1_prt_1_2_prt_1 + 1 | t | t | public.t_aoco_part_1_prt_1_2_prt_2 + 2 | t | t | public.t_aoco_part_1_prt_1_2_prt_1 + 2 | t | t | public.t_aoco_part_1_prt_1_2_prt_2 +(6 rows) + +-- One segment +select now() as before_query \gset +select * from t_aoco_part where a = 0; + a | b | c +---+---+--- +(0 rows) + +select count(distinct gp_segment_id), string_agg(tbl, ',' order by tbl) + from show_log(:'before_query') + where date_ok and user_ok; + count | string_agg +-------+--------------------------------------------------------------------------------------------------------------------------------------------- + 1 | public.t_aoco_part_1_prt_1_2_prt_1,public.t_aoco_part_1_prt_1_2_prt_2,public.t_aoco_part_1_prt_2_2_prt_1,public.t_aoco_part_1_prt_2_2_prt_2 +(1 row) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +-- Table without partitions +create table t_aoco(a int, b int) +distributed by (a); +select now() as before_query \gset +select * from t_aoco; + a | b +---+--- +(0 rows) + +select * from show_log(:'before_query'); + gp_segment_id | date_ok | user_ok | tbl +---------------+---------+---------+--------------- + 1 | t | t | public.t_aoco + 2 | t | t | public.t_aoco + 0 | t | t | public.t_aoco +(3 rows) + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + pg_file_unlink +---------------- + t + t + t +(3 rows) + +-- +-- Check user name logging +-- start_ignore +drop role if exists user1; +-- end_ignore +create role user1 login resource queue pg_default; +grant select on t_ao_part to user1; +grant select on t_aoco to user1; +select '\! cp "' || setting || '/pg_hba.conf" "' || setting || '/pg_hba.conf.backup"' as cp_backup +from pg_settings +where name = 'data_directory' \gset +:cp_backup +select '\! echo "local all user1 trust" >> ' || setting || '/pg_hba.conf' as add_user +from pg_settings +where name = 'data_directory' \gset +:add_user +select current_user \gset +\c - user1 +select * from t_ao_part; + a | b | c +---+---+--- +(0 rows) + +select * from t_aoco; + a | b +---+--- +(0 rows) + +\c - :"current_user" +select gp_segment_id, loguser, tbl from access_log; + gp_segment_id | loguser | tbl +---------------+---------+---------------------------------- + 2 | user1 | public.t_ao_part_1_prt_1_2_prt_2 + 2 | user1 | public.t_ao_part_1_prt_2_2_prt_1 + 2 | user1 | public.t_ao_part_1_prt_2_2_prt_2 + 2 | user1 | public.t_ao_part_1_prt_1_2_prt_1 + 2 | user1 | public.t_aoco + 0 | user1 | public.t_ao_part_1_prt_1_2_prt_2 + 0 | user1 | public.t_ao_part_1_prt_2_2_prt_1 + 0 | user1 | public.t_ao_part_1_prt_2_2_prt_2 + 0 | user1 | public.t_ao_part_1_prt_1_2_prt_1 + 0 | user1 | public.t_aoco + 1 | user1 | public.t_ao_part_1_prt_1_2_prt_2 + 1 | user1 | public.t_ao_part_1_prt_2_2_prt_1 + 1 | user1 | public.t_ao_part_1_prt_2_2_prt_2 + 1 | user1 | public.t_ao_part_1_prt_1_2_prt_1 + 1 | user1 | public.t_aoco +(15 rows) + +-- +-- Cleanup +drop function show_log(text); +drop external table access_log; +drop table t_heap_part, t_heap, t_ao_part, t_ao, t_aoco_part, t_aoco; diff --git a/gpcontrib/access_log/sql/access_log.sql b/gpcontrib/access_log/sql/access_log.sql new file mode 100644 index 00000000000..f6cf3170242 --- /dev/null +++ b/gpcontrib/access_log/sql/access_log.sql @@ -0,0 +1,245 @@ +-- start_ignore +drop external table if exists access_log; +drop table if exists t_heap_part, t_heap, t_ao_part, t_ao, t_aoco_part, t_aoco; +-- Delete log files from master +select pg_file_unlink('pg_log/access.log'); +-- Delete log files from segments +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); +-- end_ignore + +-- Create external table to read log files from segments +do +$$ +declare + seg text; + locations text := ''; +begin + for seg in + select hostname || ':' || port || datadir + from gp_segment_configuration + where role = 'p' and content >= 0 + loop + if locations != '' then + locations = locations || ','; + end if; + + locations = locations || '''file://' || seg || '/pg_log/access.log'''; + end loop; + + execute 'create external table access_log + (logtime timestamp with time zone, loguser text, tbl text) + location (' || locations || ') format ''csv'''; +end $$; + +-- Start logging +load '$libdir/access_log.so'; + + +create or replace function show_log(before_query text) + returns table(gp_segment_id int, date_ok bool, user_ok bool, tbl text) +as $$ + select gp_segment_id, + logtime between before_query::timestamp with time zone and now() date_ok, + loguser=current_user user_ok, + tbl + from access_log; +$$ language sql; + + +-- +-- Heap table + +-- Partitioned table +create table t_heap_part(a int, b int, c int) +distributed by (a) +partition by range (b) + subpartition by range (c) + subpartition template (start (40) end (46) every (3)) +(start (0) end (4) every (2)); + +-- All partitions +select now() as before_query \gset +select * from t_heap_part; +select * from show_log(:'before_query'); +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + +-- Read partitions selected by condition in WHERE +select now() as before_query \gset +select * from t_heap_part where c = 40; +select * from show_log(:'before_query'); +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + +select now() as before_query \gset +select * from t_heap_part where b = 0; +select * from show_log(:'before_query'); +-- Don't delete files, because the next select will lead to adding log records +-- on one segment only. Reading from an external table fails when log file does +-- not exist on any segment. + +-- One segment +select now() as before_query \gset +select * from t_heap_part where a = 0; +select count(distinct gp_segment_id), string_agg(tbl, ',' order by tbl) + from show_log(:'before_query') + where date_ok and user_ok; +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + + +-- Table without partitions +create table t_heap(a int) +distributed by (a); + +select now() as before_query \gset +select * from t_heap; +select * from show_log(:'before_query'); +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + + +-- +-- AO table + +-- Partitioned table +create table t_ao_part(a int, b int, c int) +with (appendonly = true) +distributed by (a) +partition by range (b) + subpartition by range (c) + subpartition template (start (40) end (46) every (3)) +(start (0) end (4) every (2)); + +-- All partitions +select now() as before_query \gset +select * from t_ao_part; +select * from show_log(:'before_query'); +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + +-- Read partitions selected by condition in WHERE +select now() as before_query \gset +select * from t_ao_part where c = 40; +select * from show_log(:'before_query'); +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + +select now() as before_query \gset +select * from t_ao_part where b = 0; +select * from show_log(:'before_query'); + +-- One segment +select now() as before_query \gset +select * from t_ao_part where a = 0; +select count(distinct gp_segment_id), string_agg(tbl, ',' order by tbl) + from show_log(:'before_query') + where date_ok and user_ok; +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + + +-- Table without partitions +create table t_ao(a int, b int) +distributed by (a); + +select now() as before_query \gset +select * from t_ao; +select * from show_log(:'before_query'); +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + + +-- +-- AOCO table + +-- Partitioned table +create table t_aoco_part(a int, b int, c int) +with (appendonly = true, orientation = column) +distributed by (a) +partition by range (b) + subpartition by range (c) + subpartition template (start (40) end (46) every (3)) +(start (0) end (4) every (2)); + +-- All partitions +select now() as before_query \gset +select * from t_aoco_part; +select * from show_log(:'before_query'); +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + +-- Read partitions selected by condition in WHERE +select now() as before_query \gset +select * from t_aoco_part where c = 40; +select * from show_log(:'before_query'); +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + +select now() as before_query \gset +select * from t_aoco_part where b = 0; +select * from show_log(:'before_query'); + +-- One segment +select now() as before_query \gset +select * from t_aoco_part where a = 0; +select count(distinct gp_segment_id), string_agg(tbl, ',' order by tbl) + from show_log(:'before_query') + where date_ok and user_ok; +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + + +-- Table without partitions +create table t_aoco(a int, b int) +distributed by (a); + +select now() as before_query \gset +select * from t_aoco; +select * from show_log(:'before_query'); +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); + + +-- +-- Check user name logging + +-- start_ignore +drop role if exists user1; +-- end_ignore +create role user1 login resource queue pg_default; +grant select on t_ao_part to user1; +grant select on t_aoco to user1; + +select '\! cp "' || setting || '/pg_hba.conf" "' || setting || '/pg_hba.conf.backup"' as cp_backup +from pg_settings +where name = 'data_directory' \gset + +:cp_backup + +select '\! echo "local all user1 trust" >> ' || setting || '/pg_hba.conf' as add_user +from pg_settings +where name = 'data_directory' \gset + +:add_user + +select current_user \gset +-- start_ignore +\! gpconfig -c shared_preload_libraries -v 'access_log' -m '' +\! gpstop -raiq +-- end_ignore +\c - user1 + +select * from t_ao_part; +select * from t_aoco; + +\c - :"current_user" + +select gp_segment_id, loguser, tbl from access_log; + +-- +-- Cleanup +drop function show_log(text); +drop external table access_log; +drop table t_heap_part, t_heap, t_ao_part, t_ao, t_aoco_part, t_aoco; +-- start_ignore +select '\! cp "' || setting || '/pg_hba.conf.backup" "' || setting || '/pg_hba.conf"' as cp_restore +from pg_settings +where name = 'data_directory' \gset + +:cp_restore + +select pg_file_unlink('pg_log/access.log') from gp_dist_random('gp_id'); +select pg_file_unlink('pg_log/access.log'); + +\! gpconfig -r shared_preload_libraries +\! gpstop -raiq +-- end_ignore diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index f7e536037a5..1954bef49b9 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -33,6 +33,8 @@ #include "cdb/cdbaocsam.h" #include "utils/snapmgr.h" +init_scan_hook_type init_scan_hook = NULL; + static void InitScanRelation(SeqScanState *node, EState *estate, int eflags, Relation currentRelation); static TupleTableSlot *SeqNext(SeqScanState *node); @@ -141,6 +143,9 @@ ExecSeqScan(SeqScanState *node) static void InitScanRelation(SeqScanState *node, EState *estate, int eflags, Relation currentRelation) { + if (init_scan_hook) + init_scan_hook(currentRelation); + /* initialize a heapscan */ if (RelationIsAoRows(currentRelation)) { diff --git a/src/include/executor/nodeSeqscan.h b/src/include/executor/nodeSeqscan.h index dfdd14ff5ae..6c4aec4b38b 100644 --- a/src/include/executor/nodeSeqscan.h +++ b/src/include/executor/nodeSeqscan.h @@ -23,4 +23,7 @@ extern TupleTableSlot *ExecSeqScan(SeqScanState *node); extern void ExecEndSeqScan(SeqScanState *node); extern void ExecReScanSeqScan(SeqScanState *node); +typedef void (*init_scan_hook_type)(Relation currentRelation); +extern PGDLLIMPORT init_scan_hook_type init_scan_hook; + #endif /* NODESEQSCAN_H */