diff --git a/Makefile b/Makefile index 2440b24d8..19cc44657 100644 --- a/Makefile +++ b/Makefile @@ -84,6 +84,7 @@ RWOBJS = \ $(FETOOLS)/pg_rewind/local_source.o \ $(FETOOLS)/pg_rewind/parsexlog.o \ $(FETOOLS)/pg_rewind/pg_rewind.o \ + $(FETOOLS)/pg_rewind/tde_file.o \ $(FETOOLS)/pg_rewind/timeline.o RMGRDESCSOURCES = $(sort $(wildcard $(FETOOLS)/rmgrdesc/*desc*.c)) diff --git a/fetools/pg18/pg_rewind/filemap.c b/fetools/pg18/pg_rewind/filemap.c index c933871ca..c97d77f20 100644 --- a/fetools/pg18/pg_rewind/filemap.c +++ b/fetools/pg18/pg_rewind/filemap.c @@ -487,6 +487,8 @@ action_to_str(file_action_t action) return "CREATE"; case FILE_ACTION_REMOVE: return "REMOVE"; + case FILE_ACTION_ENSURE_TDE_KEY: + return "ENSURE_KEY"; default: return "unknown"; @@ -572,9 +574,33 @@ isRelDataFile(const char *path) { RelFileLocator rlocator; unsigned int segNo; - int nmatch; bool matched; + matched = path_rlocator(path, &rlocator, &segNo); + if (matched) + { + char *check_path = datasegpath(rlocator, MAIN_FORKNUM, segNo); + + if (strcmp(check_path, path) != 0) + matched = false; + + pfree(check_path); + } + + return matched; +} + +/* + * Sets rlocator and segNo based on given path. Returns false if didn't find + * a match. + * + * Only concerned with files belonging to the main fork. + */ +bool +path_rlocator(const char *path, RelFileLocator *rlocator, unsigned int *segNo) +{ + int nmatch; + /*---- * Relation data files can be in one of the following directories: * @@ -594,55 +620,38 @@ isRelDataFile(const char *path) * *---- */ - rlocator.spcOid = InvalidOid; - rlocator.dbOid = InvalidOid; - rlocator.relNumber = InvalidRelFileNumber; - segNo = 0; - matched = false; + rlocator->spcOid = InvalidOid; + rlocator->dbOid = InvalidOid; + rlocator->relNumber = InvalidRelFileNumber; + *segNo = 0; - nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo); + nmatch = sscanf(path, "global/%u.%u", &rlocator->relNumber, segNo); if (nmatch == 1 || nmatch == 2) { - rlocator.spcOid = GLOBALTABLESPACE_OID; - rlocator.dbOid = 0; - matched = true; + rlocator->spcOid = GLOBALTABLESPACE_OID; + rlocator->dbOid = 0; + return true; } else { nmatch = sscanf(path, "base/%u/%u.%u", - &rlocator.dbOid, &rlocator.relNumber, &segNo); + &rlocator->dbOid, &rlocator->relNumber, segNo); if (nmatch == 2 || nmatch == 3) { - rlocator.spcOid = DEFAULTTABLESPACE_OID; - matched = true; + rlocator->spcOid = DEFAULTTABLESPACE_OID; + return true; } else { nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u", - &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber, - &segNo); + &rlocator->spcOid, &rlocator->dbOid, &rlocator->relNumber, + segNo); if (nmatch == 3 || nmatch == 4) - matched = true; + return true; } } - /* - * The sscanf tests above can match files that have extra characters at - * the end. To eliminate such cases, cross-check that GetRelationPath - * creates the exact same filename, when passed the RelFileLocator - * information we extracted from the filename. - */ - if (matched) - { - char *check_path = datasegpath(rlocator, MAIN_FORKNUM, segNo); - - if (strcmp(check_path, path) != 0) - matched = false; - - pfree(check_path); - } - - return matched; + return false; } /* @@ -712,6 +721,13 @@ decide_file_action(file_entry_t *entry) if (strstr(path, ".DS_Store") != NULL) return FILE_ACTION_NONE; + /* + * Skip pg_tde key data but WAL-related stuff as WAL being replaced by + * source's. We will handle the rest while re-encrypting data. + */ + if (strstr(path, "pg_tde/") != NULL) + return FILE_ACTION_NONE; + /* * Remove all files matching the exclusion filters in the target. */ @@ -831,14 +847,15 @@ decide_file_action(file_entry_t *entry) * in the target will be copied based on parsing the target * system's WAL, and any blocks modified in the source will be * updated after rewinding, when the source system's WAL is - * replayed. + * replayed. But we still have to sync source/target keys in + * case it is encrypted. */ if (entry->target_size < entry->source_size) return FILE_ACTION_COPY_TAIL; else if (entry->target_size > entry->source_size) return FILE_ACTION_TRUNCATE; else - return FILE_ACTION_NONE; + return FILE_ACTION_ENSURE_TDE_KEY; } break; diff --git a/fetools/pg18/pg_rewind/filemap.h b/fetools/pg18/pg_rewind/filemap.h index df78a02e3..7ab0da641 100644 --- a/fetools/pg18/pg_rewind/filemap.h +++ b/fetools/pg18/pg_rewind/filemap.h @@ -25,6 +25,9 @@ typedef enum * blocks based on the parsed WAL) */ FILE_ACTION_TRUNCATE, /* truncate local file to 'newsize' bytes */ FILE_ACTION_REMOVE, /* remove local file / directory / symlink */ + FILE_ACTION_ENSURE_TDE_KEY, /* data file with no action, but we to check + * if it is encrypted and sync source/target + * keys */ } file_action_t; typedef enum @@ -113,4 +116,6 @@ extern void print_filemap(filemap_t *filemap); extern void keepwal_init(void); extern void keepwal_add_entry(const char *path); +extern bool path_rlocator(const char *path, RelFileLocator *rlocator, unsigned int *segNo); + #endif /* FILEMAP_H */ diff --git a/fetools/pg18/pg_rewind/libpq_source.c b/fetools/pg18/pg_rewind/libpq_source.c index 56c2ad55d..2230360bd 100644 --- a/fetools/pg18/pg_rewind/libpq_source.c +++ b/fetools/pg18/pg_rewind/libpq_source.c @@ -17,6 +17,9 @@ #include "pg_rewind.h" #include "port/pg_bswap.h" #include "rewind_source.h" +#include "tde_file.h" + +#include "pg_tde.h" /* * Files are fetched MAX_CHUNK_SIZE bytes at a time, and with a @@ -31,6 +34,7 @@ typedef struct const char *path; /* path relative to data directory root */ off_t offset; size_t length; + bool encrypt; } fetch_range_request; typedef struct @@ -71,6 +75,10 @@ static char *libpq_fetch_file(rewind_source *source, const char *path, static XLogRecPtr libpq_get_current_wal_insert_lsn(rewind_source *source); static void libpq_destroy(rewind_source *source); +static void libpq_queue_fetch_range_do(rewind_source *source, const char *path, + bool encrypt, off_t off, size_t len); +static void libpq_fetch_tde_keys(rewind_source *source); + /* * Create a new libpq source. * @@ -100,6 +108,8 @@ init_libpq_source(PGconn *conn) initStringInfo(&src->offsets); initStringInfo(&src->lengths); + libpq_fetch_tde_keys(&src->common); + return &src->common; } @@ -345,7 +355,7 @@ libpq_queue_fetch_file(rewind_source *source, const char *path, size_t len) * fetch-requests are for a whole file. */ open_target_file(path, true); - libpq_queue_fetch_range(source, path, 0, Max(len, MAX_CHUNK_SIZE)); + libpq_queue_fetch_range_do(source, path, false, 0, Max(len, MAX_CHUNK_SIZE)); } /* @@ -354,6 +364,16 @@ libpq_queue_fetch_file(rewind_source *source, const char *path, size_t len) static void libpq_queue_fetch_range(rewind_source *source, const char *path, off_t off, size_t len) +{ + libpq_queue_fetch_range_do(source, path, true, off, len); +} + +/* + * Queue up a request to fetch a piece of a file from remote system. + */ +static void +libpq_queue_fetch_range_do(rewind_source *source, const char *path, bool encrypt, off_t off, + size_t len) { libpq_source *src = (libpq_source *) source; @@ -406,6 +426,7 @@ libpq_queue_fetch_range(rewind_source *source, const char *path, off_t off, src->request_queue[src->num_requests].path = path; src->request_queue[src->num_requests].offset = off; src->request_queue[src->num_requests].length = thislen; + src->request_queue[src->num_requests].encrypt = encrypt; src->num_requests++; off += thislen; @@ -592,6 +613,19 @@ process_queued_fetch_requests(libpq_source *src) open_target_file(filename, false); + if (rq->encrypt) + { + Assert(chunksize % BLCKSZ == 0); + + ensure_tde_keys(filename); + + for (int i = 0; i < chunksize / BLCKSZ; i++) + { + unsigned char *data = (unsigned char *) chunk + BLCKSZ * i; + + encrypt_block(data, chunkoff + BLCKSZ * i, MAIN_FORKNUM); + } + } write_target_range(chunk, chunkoff, chunksize); } @@ -682,3 +716,37 @@ libpq_destroy(rewind_source *source) /* NOTE: we don't close the connection here, as it was not opened by us. */ } + +static void +libpq_fetch_tde_keys(rewind_source *source) +{ + PGconn *conn = ((libpq_source *) source)->conn; + PGresult *res; + + res = PQexec(conn, "SELECT pg_ls_dir('"PG_TDE_DATA_DIR"', true, false)"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("could not fetch file list: %s", + PQresultErrorMessage(res)); + + /* no tde dir, nothing to do */ + if (PQnfields(res) == 0) + return; + + init_tde(); + + for (int i = 0; i < PQntuples(res); i++) + { + char *path; + char *tde_file_buf; + size_t size; + char target_path[MAXPGPATH]; + + path = PQgetvalue(res, i, 0); + + snprintf(target_path, MAXPGPATH, "%s/%s", PG_TDE_DATA_DIR, path); + tde_file_buf = libpq_fetch_file(source, target_path, &size); + + write_tmp_source_file(path, tde_file_buf, size); + } +} diff --git a/fetools/pg18/pg_rewind/local_source.c b/fetools/pg18/pg_rewind/local_source.c index 5a6e805c1..9a1f995bc 100644 --- a/fetools/pg18/pg_rewind/local_source.c +++ b/fetools/pg18/pg_rewind/local_source.c @@ -10,11 +10,19 @@ #include "postgres_fe.h" #include +#include #include +#include "catalog/pg_tablespace_d.h" #include "common/logging.h" #include "file_ops.h" +#include "pg_rewind.h" #include "rewind_source.h" +#include "tde_file.h" + +#include "pg_tde.h" +#include "common/pg_tde_utils.h" +#include "access/pg_tde_tdemap.h" typedef struct { @@ -34,6 +42,8 @@ static void local_queue_fetch_range(rewind_source *source, const char *path, static void local_finish_fetch(rewind_source *source); static void local_destroy(rewind_source *source); +static void local_fetch_tde_keys(rewind_source *source); + rewind_source * init_local_source(const char *datadir) { @@ -51,6 +61,8 @@ init_local_source(const char *datadir) src->datadir = datadir; + local_fetch_tde_keys(&src->common); + return &src->common; } @@ -145,6 +157,8 @@ local_queue_fetch_range(rewind_source *source, const char *path, off_t off, open_target_file(path, false); + ensure_tde_keys(path); + while (end - begin > 0) { ssize_t readlen; @@ -162,6 +176,9 @@ local_queue_fetch_range(rewind_source *source, const char *path, off_t off, else if (readlen == 0) pg_fatal("unexpected EOF while reading file \"%s\"", srcpath); + /* Re-encrypt blocks with a proper key if neeed. */ + encrypt_block((unsigned char *) buf.data, begin, MAIN_FORKNUM); + write_target_range(buf.data, begin, readlen); begin += readlen; } @@ -170,12 +187,37 @@ local_queue_fetch_range(rewind_source *source, const char *path, off_t off, pg_fatal("could not close file \"%s\": %m", srcpath); } +static bool +directory_exists(const char *dir) +{ + struct stat st; + + if (stat(dir, &st) != 0) + return false; + if (S_ISDIR(st.st_mode)) + return true; + return false; +} + +static void +local_fetch_tde_keys(rewind_source *source) +{ + char tde_source_dir[MAXPGPATH]; + const char *datadir = ((local_source *) source)->datadir; + + snprintf(tde_source_dir, sizeof(tde_source_dir), "%s/%s", datadir, PG_TDE_DATA_DIR); + + if (!directory_exists(tde_source_dir)) + return; + + init_tde(); + copy_tmp_tde_files(tde_source_dir); +} + static void local_finish_fetch(rewind_source *source) { - /* - * Nothing to do, local_queue_fetch_range() copies the ranges immediately. - */ + flush_current_key(); } static void diff --git a/fetools/pg18/pg_rewind/pg_rewind.c b/fetools/pg18/pg_rewind/pg_rewind.c index b0d7f3b6e..5dac0a857 100644 --- a/fetools/pg18/pg_rewind/pg_rewind.c +++ b/fetools/pg18/pg_rewind/pg_rewind.c @@ -31,6 +31,7 @@ #include "pg_rewind.h" #include "rewind_source.h" #include "storage/bufpage.h" +#include "tde_file.h" #include "pg_tde.h" #include "access/pg_tde_fe_init.h" @@ -600,6 +601,7 @@ perform_rewind(filemap_t *filemap, rewind_source *source, while (datapagemap_next(iter, &blkno)) { offset = blkno * BLCKSZ; + source->queue_fetch_range(source, entry->path, offset, BLCKSZ); } pg_free(iter); @@ -611,6 +613,10 @@ perform_rewind(filemap_t *filemap, rewind_source *source, /* nothing else to do */ break; + case FILE_ACTION_ENSURE_TDE_KEY: + ensure_tde_keys(entry->path); + break; + case FILE_ACTION_COPY: source->queue_fetch_file(source, entry->path, entry->source_size); break; @@ -644,6 +650,8 @@ perform_rewind(filemap_t *filemap, rewind_source *source, close_target_file(); + fetch_tde_dir(); + progress_report(true); /* diff --git a/fetools/pg18/pg_rewind/tde_file.c b/fetools/pg18/pg_rewind/tde_file.c new file mode 100644 index 000000000..691a3722f --- /dev/null +++ b/fetools/pg18/pg_rewind/tde_file.c @@ -0,0 +1,300 @@ +#include "postgres_fe.h" + +#include + +#include "catalog/pg_tablespace_d.h" +#include "common/file_perm.h" + +#include "file_ops.h" +#include "filemap.h" +#include "pg_rewind.h" +#include "tde_file.h" + +#include "access/pg_tde_tdemap.h" +#include "common/pg_tde_utils.h" +#include "pg_tde.h" + +static void copy_dir(const char *src, const char *dst); +static void create_tde_tmp_dir(void); + +typedef struct +{ + InternalKey *source_key; + InternalKey *target_key; + char path[MAXPGPATH]; + RelFileLocator rlocator; + unsigned int segNo; +} current_file_data; + +static current_file_data current_tde_file = { 0 }; + +/* Dir for an operational copy of source's tde files (_keys, etc) */ +static char tde_tmp_scource[MAXPGPATH] = "/tmp/pg_tde_rewindXXXXXX"; +static bool source_has_tde = false; + +static void +recrypt_fork(ForkNumber fork) +{ + int srcfd; + int trgfd; + char srcpath[MAXPGPATH]; + char trgtpath[MAXPGPATH]; + PGIOAlignedBlock buf; + size_t written_len; + RelPathStr rp = relpathperm(current_tde_file.rlocator, fork); + + snprintf(srcpath, sizeof(srcpath), "%s/%s", datadir_target, rp.str); + + /* check if fork exists, nothing to do if it does not */ + if (access(srcpath, F_OK) != 0) + return; + + srcfd = open(srcpath, O_RDONLY | PG_BINARY, 0); + if (srcfd < 0) + { + /* + * Server can recover from wrecked VM/FSM, hence only warnings here and + * in the rest of the function + */ + pg_log_warning("could not open file for reading \"%s\": %m", + srcpath); + return; + } + + trgfd = open(srcpath, O_WRONLY | PG_BINARY, 0); + if (trgfd < 0) + { + pg_log_warning("could not open file for writing \"%s\": %m", + srcpath); + close(srcfd); + return; + } + + written_len = 0; + for (;;) + { + ssize_t read_len; + + read_len = read(srcfd, buf.data, sizeof(buf)); + + if (read_len < 0) + pg_fatal("could not read file \"%s\": %m", srcpath); + else if (read_len == 0) + break; /* EOF reached */ + + encrypt_block((unsigned char *) buf.data, written_len, fork); + + if (write(trgfd, buf.data, read_len) != read_len) + { + pg_log_warning("could not write block to fork file \"%s\": %m", srcpath); + break; + } + written_len += read_len; + } + + close(srcfd); + close(trgfd); +} + + +void +flush_current_key(void) +{ + if (current_tde_file.source_key == NULL) + return; + + pg_log_debug("re-encrypt forks for \"%s\"", current_tde_file.path); + + recrypt_fork(FSM_FORKNUM); + recrypt_fork(VISIBILITYMAP_FORKNUM); + + pg_log_debug("update internal key for \"%s\"", current_tde_file.path); + pg_tde_set_data_dir(tde_tmp_scource); + pg_tde_save_smgr_key(current_tde_file.rlocator, current_tde_file.target_key, true); + + pfree(current_tde_file.source_key); + pfree(current_tde_file.target_key); + memset(¤t_tde_file, 0, sizeof(current_tde_file)); +} + +void +ensure_tde_keys(const char *relpath) +{ + char target_tde_path[MAXPGPATH]; + RelFileLocator rlocator; + unsigned int segNo; + + /* no TDE on source, nothing to do */ + if (!source_has_tde) + return; + + /* the same file, nothing to do */ + if (strcmp(current_tde_file.path, relpath) == 0 ) + return; + + flush_current_key(); + + if (!path_rlocator(relpath, &rlocator, &segNo)) + return; + + pg_tde_set_data_dir(tde_tmp_scource); + current_tde_file.source_key = pg_tde_get_smgr_key(rlocator); + + snprintf(target_tde_path, sizeof(target_tde_path), "%s/%s", datadir_target, PG_TDE_DATA_DIR); + pg_tde_set_data_dir(target_tde_path); + current_tde_file.target_key = pg_tde_get_smgr_key(rlocator); + + if (current_tde_file.source_key != NULL) + { + /* + * If there ever was a source_key, it must be a target_key for this + * rlocator. `ALTER TABLE ... SET ACCESS METHOD heap` would create + * a new rlocator, hence it would not be a range chage. + * + * XXX: should be an elog FATAL instead? + */ + Assert(current_tde_file.target_key != NULL); + + memset(current_tde_file.path, 0, MAXPGPATH); + strlcpy(current_tde_file.path, relpath, MAXPGPATH); + current_tde_file.rlocator = rlocator; + current_tde_file.segNo = segNo; + } +} + +void +encrypt_block(unsigned char *buf, off_t file_offset, ForkNumber fork) +{ + BlockNumber blkno; + + /* not a tde file, nothing do to */ + if (current_tde_file.source_key == NULL) + return; + + Assert(file_offset % BLCKSZ == 0); + + blkno = file_offset / BLCKSZ + current_tde_file.segNo * RELSEG_SIZE; + + pg_log_debug("re-encrypt block in %s, offset: %lu, blockNum: %u", current_tde_file.path, file_offset, blkno); + tde_decrypt_smgr_block(current_tde_file.source_key, fork, blkno, buf, buf); + tde_encrypt_smgr_block(current_tde_file.target_key, fork, blkno, buf, buf); +} + + +static void +create_tde_tmp_dir(void) +{ + if (mkdtemp(tde_tmp_scource) == NULL) + pg_fatal("could not create temporary directory \"%s\": %m", tde_tmp_scource); + + pg_log_debug("created temporary pg_tde directory: %s", tde_tmp_scource); +} + +void +destroy_tde_tmp_dir(void) +{ + rmtree(tde_tmp_scource, true); +} + +static void +write_file(const char *path, char *buf, size_t size) +{ + int fd; + + fd = open(path, O_WRONLY | O_CREAT | PG_BINARY, pg_file_create_mode); + if (fd < 0) + pg_fatal("could not create temporary tde file \"%s\": %m", path); + + if (write(fd, buf, size) != size) + pg_fatal("could not write temporary tde file \"%s\": %m", path); + + if (close(fd) != 0) + pg_fatal("could not close temporary tde file \"%s\": %m", path); +} + +void +write_tmp_source_file(const char *fname, char *buf, size_t size) +{ + char path[MAXPGPATH]; + + snprintf(path, MAXPGPATH, "%s/%s", tde_tmp_scource, fname); + + write_file(path, buf, size); +} + +static void +copy_dir(const char *src, const char *dst) +{ + DIR *xldir; + struct dirent *xlde; + char src_path[MAXPGPATH]; + char dst_path[MAXPGPATH]; + + xldir = opendir(src); + if (xldir == NULL) + pg_fatal("could not open directory \"%s\": %m", src); + + while (errno = 0, (xlde = readdir(xldir)) != NULL) + { + struct stat fst; + + if (strcmp(xlde->d_name, ".") == 0 || + strcmp(xlde->d_name, "..") == 0) + continue; + + snprintf(src_path, sizeof(src_path), "%s/%s", src, xlde->d_name); + snprintf(dst_path, sizeof(dst_path), "%s/%s", dst, xlde->d_name); + + if (lstat(src_path, &fst) < 0) + pg_fatal("could not stat file \"%s\": %m", src_path); + + if (S_ISREG(fst.st_mode)) + { + char *buf; + size_t size; + + buf = slurpFile(src, xlde->d_name, &size); + + write_file(dst_path, buf, size); + pg_free(buf); + } + } + + if (errno) + pg_fatal("could not read directory \"%s\": %m", src); + + if (closedir(xldir)) + pg_fatal("could not close directory \"%s\": %m", src); +} + +void +init_tde(void) +{ + source_has_tde = true; + create_tde_tmp_dir(); +} + +void +copy_tmp_tde_files(const char *from) +{ + copy_dir(from, tde_tmp_scource); +} + +void +fetch_tde_dir(void) +{ + char target_tde_dir[MAXPGPATH]; + + if (!source_has_tde) + return; + + if (!dry_run) + { + snprintf(target_tde_dir, MAXPGPATH, "%s/%s", datadir_target, PG_TDE_DATA_DIR); + + rmtree(target_tde_dir, false); + copy_dir(tde_tmp_scource, target_tde_dir); + } + + destroy_tde_tmp_dir(); +} diff --git a/fetools/pg18/pg_rewind/tde_file.h b/fetools/pg18/pg_rewind/tde_file.h new file mode 100644 index 000000000..39a9c092b --- /dev/null +++ b/fetools/pg18/pg_rewind/tde_file.h @@ -0,0 +1,16 @@ +#ifndef PG_REWIND_TDE_FILE_H +#define PG_REWIND_TDE_FILE_H + +#include "common/relpath.h" + +extern void flush_current_key(void); +extern void ensure_tde_keys(const char *relpath); +extern void encrypt_block(unsigned char *buf, off_t file_offset, ForkNumber fork); + +extern void destroy_tde_tmp_dir(void); +extern void write_tmp_source_file(const char *fname, char *buf, size_t size); +extern void fetch_tde_dir(void); +extern void copy_tmp_tde_files(const char *from); +extern void init_tde(void); + +#endif /* PG_REWIND_TDE_FILE_H */ diff --git a/src/access/pg_tde_tdemap.c b/src/access/pg_tde_tdemap.c index 40f106d8a..9d3e0cda2 100644 --- a/src/access/pg_tde_tdemap.c +++ b/src/access/pg_tde_tdemap.c @@ -82,8 +82,6 @@ static int pg_tde_open_file_basic(const char *tde_filename, int fileFlags, bool static int pg_tde_open_file_read(const char *tde_filename, bool ignore_missing, off_t *curr_pos); static void pg_tde_file_header_read(const char *tde_filename, int fd, TDEFileHeader *fheader, off_t *bytes_read); static bool pg_tde_read_one_map_entry(int fd, TDEMapEntry *map_entry, off_t *offset); - -#ifndef FRONTEND static void pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, const char *db_map_path); static int pg_tde_file_header_write(const char *tde_filename, int fd, const TDESignedPrincipalKeyInfo *signed_key_info, off_t *bytes_written); static void pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const InternalKey *rel_key_data); @@ -168,6 +166,7 @@ pg_tde_save_smgr_key(RelFileLocator rel, LWLockRelease(lock_pk); } +#ifndef FRONTEND const char * tde_sprint_key(InternalKey *k) { @@ -437,7 +436,6 @@ pg_tde_sign_principal_key_info(TDESignedPrincipalKeyInfo *signed_key_info, const signed_key_info->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE); } -#ifndef FRONTEND static void pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const InternalKey *rel_key_data) { @@ -462,9 +460,7 @@ pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *princ map_entry->encrypted_key_data, map_entry->aead_tag, MAP_ENTRY_AEAD_TAG_SIZE); } -#endif -#ifndef FRONTEND static void pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, const char *db_map_path) { @@ -487,7 +483,6 @@ pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, *offset += bytes_written; } -#endif /* * Returns true if we find a valid match; e.g. type is not set to @@ -643,7 +638,6 @@ pg_tde_open_file_read(const char *tde_filename, bool ignore_missing, off_t *curr return fd; } -#ifndef FRONTEND /* * Open for write and Validate File Header: * header: {Format Version, Principal Key Name} @@ -677,7 +671,6 @@ pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo *curr_pos = bytes_read + bytes_written; return fd; } -#endif /* * Read TDE file header from a TDE file and fill in the fheader data structure. @@ -701,7 +694,6 @@ pg_tde_file_header_read(const char *tde_filename, int fd, TDEFileHeader *fheader } } -#ifndef FRONTEND /* * Write TDE file header to a TDE file. */ @@ -734,7 +726,6 @@ pg_tde_file_header_write(const char *tde_filename, int fd, const TDESignedPrinci return fd; } -#endif /* * Returns true if a map entry if found or false if we have reached the end of diff --git a/src/encryption/enc_tde.c b/src/encryption/enc_tde.c index 495e9cb90..8e2997542 100644 --- a/src/encryption/enc_tde.c +++ b/src/encryption/enc_tde.c @@ -141,3 +141,67 @@ pg_tde_stream_crypt(const char *iv_prefix, batch_no++; } } + +/* + * The intialization vector of a block is its block number conmverted to a + * 128 bit big endian number plus the forknumber XOR the base IV of the + * relation file. + */ +static void +CalcBlockIv(ForkNumber forknum, BlockNumber bn, const unsigned char *base_iv, unsigned char *iv) +{ + memset(iv, 0, 16); + + /* The init fork is copied to the main fork so we must use the same IV */ + iv[7] = forknum == INIT_FORKNUM ? MAIN_FORKNUM : forknum; + + iv[12] = bn >> 24; + iv[13] = bn >> 16; + iv[14] = bn >> 8; + iv[15] = bn; + + for (int i = 0; i < 16; i++) + iv[i] ^= base_iv[i]; +} + +void +tde_decrypt_smgr_block(InternalKey *relKey, ForkNumber forknum, BlockNumber blocknum, const unsigned char *in, unsigned char *out) +{ + unsigned char iv[16]; + bool allZero = true; + + /* + * Detect unencrypted all-zero pages written by smgrzeroextend() by + * looking at the first 32 bytes of the page. + * + * Not encrypting all-zero pages is safe because they are only written + * at the end of the file when extending a table on disk so they tend + * to be short lived plus they only leak a slightly more accurate + * table size than one can glean from just the file size. + */ + for (int i = 0; i < 32; ++i) + { + if (in[i] != 0) + { + allZero = false; + break; + } + } + + if (allZero) + return; + + CalcBlockIv(forknum, blocknum, relKey->base_iv, iv); + + AesDecrypt(relKey->key, relKey->key_len, iv, in, BLCKSZ, out); +} + +void +tde_encrypt_smgr_block(InternalKey *relKey, ForkNumber forknum, BlockNumber blocknum, const unsigned char *in, unsigned char *out) +{ + unsigned char iv[16]; + + CalcBlockIv(forknum, blocknum, relKey->base_iv, iv); + + AesEncrypt(relKey->key, relKey->key_len, iv, in, BLCKSZ, out); +} diff --git a/src/include/encryption/enc_tde.h b/src/include/encryption/enc_tde.h index a3c85a5a8..7ccdd8cac 100644 --- a/src/include/encryption/enc_tde.h +++ b/src/include/encryption/enc_tde.h @@ -5,6 +5,9 @@ #ifndef ENC_TDE_H #define ENC_TDE_H +#include "common/relpath.h" +#include "storage/block.h" + #define TDE_KEY_NAME_LEN 256 #define KEY_DATA_SIZE_128 16 /* 128 bit encryption */ #define KEY_DATA_SIZE_256 32 /* 256 bit encryption */ @@ -38,4 +41,6 @@ extern void pg_tde_stream_crypt(const char *iv_prefix, int key_len, void **ctxPtr); +extern void tde_decrypt_smgr_block(InternalKey *relKey, ForkNumber forknum, BlockNumber blocknum, const unsigned char *in, unsigned char *out); +extern void tde_encrypt_smgr_block(InternalKey *relKey, ForkNumber forknum, BlockNumber blocknum, const unsigned char *in, unsigned char *out); #endif /* ENC_TDE_H */ diff --git a/src/smgr/pg_tde_smgr.c b/src/smgr/pg_tde_smgr.c index f0a91ac57..97ab7f3a1 100644 --- a/src/smgr/pg_tde_smgr.c +++ b/src/smgr/pg_tde_smgr.c @@ -77,7 +77,6 @@ static void tde_smgr_save_temp_key(const RelFileLocator *newrlocator, const Inte static InternalKey *tde_smgr_get_temp_key(const RelFileLocator *rel); static bool tde_smgr_has_temp_key(const RelFileLocator *rel); static void tde_smgr_delete_temp_key(const RelFileLocator *rel); -static void CalcBlockIv(ForkNumber forknum, BlockNumber bn, const unsigned char *base_iv, unsigned char *iv); static void tde_smgr_log_create_key(const RelFileLocator *rlocator) @@ -265,9 +264,7 @@ tde_mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, local_buffers[i] = &local_blocks[i * BLCKSZ]; - CalcBlockIv(forknum, bn, tdereln->relKey.base_iv, iv); - - AesEncrypt(tdereln->relKey.key, tdereln->relKey.key_len, iv, ((unsigned char **) buffers)[i], BLCKSZ, local_buffers[i]); + tde_encrypt_smgr_block(&tdereln->relKey, forknum, bn, ((unsigned char **) buffers)[i], local_buffers[i]); } mdwritev(reln, forknum, blocknum, @@ -320,11 +317,8 @@ tde_mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, else { unsigned char *local_blocks = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0); - unsigned char iv[16]; - - CalcBlockIv(forknum, blocknum, tdereln->relKey.base_iv, iv); - AesEncrypt(tdereln->relKey.key, tdereln->relKey.key_len, iv, ((unsigned char *) buffer), BLCKSZ, local_blocks); + tde_encrypt_smgr_block(&tdereln->relKey, forknum, blocknum, ((unsigned char *) buffer), local_blocks); mdextend(reln, forknum, blocknum, local_blocks, skipFsync); @@ -347,33 +341,10 @@ tde_mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, for (int i = 0; i < nblocks; ++i) { - bool allZero = true; BlockNumber bn = blocknum + i; - unsigned char iv[16]; + unsigned char *buf = ((unsigned char **) buffers)[i]; - /* - * Detect unencrypted all-zero pages written by smgrzeroextend() by - * looking at the first 32 bytes of the page. - * - * Not encrypting all-zero pages is safe because they are only written - * at the end of the file when extending a table on disk so they tend - * to be short lived plus they only leak a slightly more accurate - * table size than one can glean from just the file size. - */ - for (int j = 0; j < 32; ++j) - { - if (((char **) buffers)[i][j] != 0) - { - allZero = false; - break; - } - } - if (allZero) - continue; - - CalcBlockIv(forknum, bn, tdereln->relKey.base_iv, iv); - - AesDecrypt(tdereln->relKey.key, tdereln->relKey.key_len, iv, ((unsigned char **) buffers)[i], BLCKSZ, ((unsigned char **) buffers)[i]); + tde_decrypt_smgr_block(&tdereln->relKey, forknum, bn, buf, buf); } } @@ -511,36 +482,12 @@ tde_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data) { Buffer buf = io_data[buf_off]; char *buf_ptr = BufferGetBlock(buf); - bool allZero = true; BlockNumber bn = td->smgr.blockNum + buf_off; - unsigned char iv[16]; if (prior_result.result <= buf_off) break; - /* - * Detect unencrypted all-zero pages written by smgrzeroextend() by - * looking at the first 32 bytes of the page. - * - * Not encrypting all-zero pages is safe because they are only written - * at the end of the file when extending a table on disk so they tend - * to be short lived plus they only leak a slightly more accurate - * table size than one can glean from just the file size. - */ - for (int i = 0; i < 32; i++) - { - if (buf_ptr[i] != 0) - { - allZero = false; - break; - } - } - if (allZero) - continue; - - CalcBlockIv(td->smgr.forkNum, bn, int_key->base_iv, iv); - - AesDecrypt(int_key->key, int_key->key_len, iv, ((unsigned char *) buf_ptr), BLCKSZ, ((unsigned char *) buf_ptr)); + tde_decrypt_smgr_block(int_key, td->smgr.forkNum, bn, ((unsigned char *) buf_ptr), ((unsigned char *) buf_ptr)); } return prior_result; @@ -716,25 +663,3 @@ tde_smgr_delete_temp_key(const RelFileLocator *rel) Assert(TempRelKeys); hash_search(TempRelKeys, rel, HASH_REMOVE, NULL); } - -/* - * The intialization vector of a block is its block number conmverted to a - * 128 bit big endian number plus the forknumber XOR the base IV of the - * relation file. - */ -static void -CalcBlockIv(ForkNumber forknum, BlockNumber bn, const unsigned char *base_iv, unsigned char *iv) -{ - memset(iv, 0, 16); - - /* The init fork is copied to the main fork so we must use the same IV */ - iv[7] = forknum == INIT_FORKNUM ? MAIN_FORKNUM : forknum; - - iv[12] = bn >> 24; - iv[13] = bn >> 16; - iv[14] = bn >> 8; - iv[15] = bn; - - for (int i = 0; i < 16; i++) - iv[i] ^= base_iv[i]; -}