From a77df9dc43ba03fe36a5bedd96dacb343ebbd8ea Mon Sep 17 00:00:00 2001 From: Gabor Buella Date: Thu, 5 Oct 2017 12:50:13 +0200 Subject: [PATCH 1/3] posix: relocate vinode_read and vinode_write They are not only available inside read.c or write.c anymore. --- src/libpmemfile-posix/data.c | 63 +++++++++++++++++++++++++++++++++++ src/libpmemfile-posix/data.h | 8 +++++ src/libpmemfile-posix/read.c | 34 ------------------- src/libpmemfile-posix/write.c | 29 ---------------- 4 files changed, 71 insertions(+), 63 deletions(-) diff --git a/src/libpmemfile-posix/data.c b/src/libpmemfile-posix/data.c index b8869eda7..7f89c8058 100644 --- a/src/libpmemfile-posix/data.c +++ b/src/libpmemfile-posix/data.c @@ -979,3 +979,66 @@ vinode_remove_interval(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, return deallocated_space; } + +/* + * vinode_read -- reads file + */ +size_t +vinode_read(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, size_t offset, + struct pmemfile_block_desc **last_block, char *buf, + size_t count) +{ + uint64_t size = inode_get_size(vinode->inode); + + /* + * Start reading at offset, stop reading + * when end of file is reached, or count bytes were read. + * The following two branches compute how many bytes are + * going to be read. + */ + if (offset >= size) + return 0; /* EOF already */ + + if (size - offset < count) + count = size - offset; + + struct pmemfile_block_desc *block = + find_closest_block_with_hint(vinode, offset, *last_block); + + block = iterate_on_file_range(pfp, vinode, block, offset, + count, buf, read_from_blocks); + + if (block) + *last_block = block; + + return count; +} + +/* + * vinode_write -- writes to file + */ +void +vinode_write(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, size_t offset, + struct pmemfile_block_desc **last_block, + const char *buf, size_t count) +{ + ASSERT(count > 0); + + /* + * Two steps: + * - Zero Fill some new blocks, in case the file is extended by + * writing to the file after seeking past file size ( optionally ) + * - Copy the data from the users buffer + */ + + /* All blocks needed for writing are properly allocated at this point */ + + struct pmemfile_block_desc *block = + find_closest_block_with_hint(vinode, offset, *last_block); + + block = iterate_on_file_range(pfp, vinode, block, offset, + count, (char *)buf, write_to_blocks); + + if (block) + *last_block = block; +} diff --git a/src/libpmemfile-posix/data.h b/src/libpmemfile-posix/data.h index 9b82b9909..dd7185f09 100644 --- a/src/libpmemfile-posix/data.h +++ b/src/libpmemfile-posix/data.h @@ -62,4 +62,12 @@ struct pmemfile_block_desc *iterate_on_file_range(PMEMfilepool *pfp, struct pmemfile_block_desc *starting_block, uint64_t offset, uint64_t len, char *buf, enum cpy_direction dir); +void vinode_write(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, + size_t offset, struct pmemfile_block_desc **last_block, + const char *buf, size_t count); + +size_t vinode_read(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, + size_t offset, struct pmemfile_block_desc **last_block, + char *buf, size_t count); + #endif diff --git a/src/libpmemfile-posix/read.c b/src/libpmemfile-posix/read.c index ec6908687..5b09e5dc3 100644 --- a/src/libpmemfile-posix/read.c +++ b/src/libpmemfile-posix/read.c @@ -44,40 +44,6 @@ #include "pool.h" #include "utils.h" -/* - * vinode_read -- reads file - */ -static size_t -vinode_read(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, size_t offset, - struct pmemfile_block_desc **last_block, char *buf, - size_t count) -{ - uint64_t size = inode_get_size(vinode->inode); - - /* - * Start reading at offset, stop reading - * when end of file is reached, or count bytes were read. - * The following two branches compute how many bytes are - * going to be read. - */ - if (offset >= size) - return 0; /* EOF already */ - - if (size - offset < count) - count = size - offset; - - struct pmemfile_block_desc *block = - find_closest_block_with_hint(vinode, offset, *last_block); - - block = iterate_on_file_range(pfp, vinode, block, offset, - count, buf, read_from_blocks); - - if (block) - *last_block = block; - - return count; -} - static int time_cmp(const struct pmemfile_time *t1, const struct pmemfile_time *t2) { diff --git a/src/libpmemfile-posix/write.c b/src/libpmemfile-posix/write.c index e40d43592..1bb37c7e7 100644 --- a/src/libpmemfile-posix/write.c +++ b/src/libpmemfile-posix/write.c @@ -44,35 +44,6 @@ #include "pool.h" #include "utils.h" -/* - * vinode_write -- writes to file - */ -static void -vinode_write(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, size_t offset, - struct pmemfile_block_desc **last_block, - const char *buf, size_t count) -{ - ASSERT(count > 0); - - /* - * Two steps: - * - Zero Fill some new blocks, in case the file is extended by - * writing to the file after seeking past file size ( optionally ) - * - Copy the data from the users buffer - */ - - /* All blocks needed for writing are properly allocated at this point */ - - struct pmemfile_block_desc *block = - find_closest_block_with_hint(vinode, offset, *last_block); - - block = iterate_on_file_range(pfp, vinode, block, offset, - count, (char *)buf, write_to_blocks); - - if (block) - *last_block = block; -} - /* * pmemfile_pwritev_args_check - checks some write arguments * The arguments here can be examined while holding the mutex for the From 08c7adf923b000f72025007285256c1c4cda5f08 Mon Sep 17 00:00:00 2001 From: Gabor Buella Date: Mon, 2 Oct 2017 13:34:09 +0200 Subject: [PATCH 2/3] posix: special inode: suspended refs --- CMakeLists.txt | 2 +- include/libpmemfile-posix.h | 6 +- src/libpmemfile-posix/inode.c | 36 --- src/libpmemfile-posix/inode.h | 10 +- src/libpmemfile-posix/layout.h | 13 +- src/libpmemfile-posix/pool.c | 223 ++++++++++++++++--- src/libpmemfile-posix/pool.h | 2 + src/libpmemfile-posix/unlink.c | 89 +++++++- src/libpmemfile/libpmemfile-posix-wrappers.h | 32 ++- src/libpmemfile/preload.c | 29 ++- tests/posix/pmemfile_test.cpp | 4 +- 11 files changed, 361 insertions(+), 85 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b281aee8c..27cf266f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,7 +35,7 @@ project(pmemfile C CXX) set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) set(VERSION_MAJOR 0) -set(VERSION_MINOR 4) +set(VERSION_MINOR 5) set(VERSION_PATCH 0) set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}) diff --git a/include/libpmemfile-posix.h b/include/libpmemfile-posix.h index b9394e6a7..06e515d8e 100644 --- a/include/libpmemfile-posix.h +++ b/include/libpmemfile-posix.h @@ -406,8 +406,10 @@ PMEMfile *pmemfile_open_parent(PMEMfilepool *pfp, PMEMfile *at, const char *pmemfile_errormsg(void); -int pmemfile_pool_resume(PMEMfilepool *pfp, const char *pathname); -int pmemfile_pool_suspend(PMEMfilepool *pfp); +int pmemfile_pool_resume(PMEMfilepool *pfp, const char *pool_path, + unsigned at_root, const char * const *paths, int flags); +int pmemfile_pool_suspend(PMEMfilepool *pfp, unsigned at_root, + const char *const * paths, int flags); #include "libpmemfile-posix-stubs.h" diff --git a/src/libpmemfile-posix/inode.c b/src/libpmemfile-posix/inode.c index a53c2291a..7ad65847e 100644 --- a/src/libpmemfile-posix/inode.c +++ b/src/libpmemfile-posix/inode.c @@ -728,10 +728,6 @@ vinode_suspend(PMEMfilepool *pfp, struct pmemfile_vinode *vinode) *tm = vinode->atime; } - _inode_array_add(pfp, pfp->super->suspended_inodes, vinode->tinode, - &vinode->suspended.arr, &vinode->suspended.idx, - INODE_ARRAY_NOLOCK); - if (vinode->blocks) { offset_map_delete(vinode->blocks); vinode->blocks = NULL; @@ -749,34 +745,6 @@ add_off(void *ptr, uintptr_t off) return (void *)((uintptr_t)ptr + off); } -/* - * inode_resume -- restores persistent part of inode after suspend - */ -void -inode_resume(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, - PMEMobjpool *old_pop) -{ - struct inode_suspend_info suspended = vinode->suspended; - struct pmemfile_inode *inode = vinode->inode; - - ASSERT(vinode->suspended.arr != NULL); - - if (pfp->pop != old_pop) { - uintptr_t diff = (uintptr_t)pfp->pop - (uintptr_t)old_pop; - - suspended.arr = add_off(suspended.arr, diff); - inode = add_off(inode, diff); - } - - ASSERT(inode->suspended_references > 0); - - TX_ADD_DIRECT(&inode->suspended_references); - inode->suspended_references--; - - _inode_array_unregister(pfp, suspended.arr, suspended.idx, - INODE_ARRAY_NOLOCK); -} - /* * vinode_resume -- restores runtime part of inode after suspend */ @@ -784,14 +752,10 @@ void vinode_resume(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, PMEMobjpool *old_pop) { - vinode->suspended.arr = NULL; - vinode->suspended.idx = 0; - if (pfp->pop != old_pop) { uintptr_t diff = (uintptr_t)pfp->pop - (uintptr_t)old_pop; vinode->inode = add_off(vinode->inode, diff); - if (vinode->orphaned.arr) vinode->orphaned.arr = add_off(vinode->orphaned.arr, diff); diff --git a/src/libpmemfile-posix/inode.h b/src/libpmemfile-posix/inode.h index 8b75f15cd..f97bf09e8 100644 --- a/src/libpmemfile-posix/inode.h +++ b/src/libpmemfile-posix/inode.h @@ -354,6 +354,11 @@ static inline bool vinode_is_longsymlink(struct pmemfile_vinode *vinode) const char *get_symlink(PMEMfilepool *pfp, struct pmemfile_vinode *vinode); +static inline bool inode_has_suspended_refs(const struct pmemfile_inode *inode) +{ + return (inode_get_flags(inode) & PMEMFILE_I_SUSPENDED_REF) != 0; +} + struct pmemfile_cred; TOID(struct pmemfile_inode) inode_alloc(PMEMfilepool *pfp, struct pmemfile_cred *cred, uint64_t flags); @@ -404,9 +409,10 @@ blockp_as_oid(struct pmemfile_block_desc *block) int vinode_rdlock_with_block_tree(PMEMfilepool *, struct pmemfile_vinode *); void vinode_suspend(PMEMfilepool *pfp, struct pmemfile_vinode *vinode); -void inode_resume(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, - PMEMobjpool *old_pop); void vinode_resume(PMEMfilepool *pfp, struct pmemfile_vinode *vinode, PMEMobjpool *old_pop); +/* 2 for "0x" 16 for pool_uuid 1 for ":" 2 for "0x" 16 for offset 1 for "\n" */ +#define SUSPENDED_INODE_LINE_LENGTH (2 + 16 + 1 + 2 + 16 + 1) + #endif diff --git a/src/libpmemfile-posix/layout.h b/src/libpmemfile-posix/layout.h index 6a5803baa..a22b4efe7 100644 --- a/src/libpmemfile-posix/layout.h +++ b/src/libpmemfile-posix/layout.h @@ -237,6 +237,15 @@ struct pmemfile_inode { */ COMPILE_ERROR_ON(sizeof(union pmemfile_inode_slots) != 8); +/* + * Most constants used with the flags field of pmemfile_inode are defined in the + * public header. + * + * Use the most significant 16 bits for flags only used internally. Hopefully + * this is not going to conflict with any flags in a Kernel API in the future. + */ +#define PMEMFILE_I_SUSPENDED_REF (UINT64_C(1) << 48) + COMPILE_ERROR_ON(sizeof(struct pmemfile_inode) != PMEMFILE_INODE_SIZE); #define PMEMFILE_INODE_ARRAY_VERSION(a) ((uint32_t)0x00414E49 | \ @@ -294,9 +303,6 @@ struct pmemfile_super { /* list of arrays of inodes that were deleted, but are still opened */ TOID(struct pmemfile_inode_array) orphaned_inodes; - /* list of arrays of inodes that are suspended */ - TOID(struct pmemfile_inode_array) suspended_inodes; - /* * The array of root directories. Each one of them is a root of a * separate directory tree. The path "/" resolves to root #0, all other @@ -308,7 +314,6 @@ struct pmemfile_super { char padding[PMEMFILE_SUPER_SIZE - 8 /* version */ - 16 * (PMEMFILE_ROOT_COUNT) /* toid */ - - 16 /* toid */ - 16 /* toid */]; }; diff --git a/src/libpmemfile-posix/pool.c b/src/libpmemfile-posix/pool.c index 5dfb02332..4f72f5b96 100644 --- a/src/libpmemfile-posix/pool.c +++ b/src/libpmemfile-posix/pool.c @@ -36,13 +36,16 @@ #include #include +#include #include "alloc.h" #include "blocks.h" #include "callbacks.h" #include "compiler_utils.h" +#include "data.h" #include "dir.h" #include "hash_map.h" +#include "file.h" #include "inode.h" #include "inode_array.h" #include "locks.h" @@ -123,7 +126,6 @@ initialize_super_block(PMEMfilepool *pfp) super->version = PMEMFILE_CUR_VERSION; super->orphaned_inodes = inode_array_alloc(pfp); - super->suspended_inodes = inode_array_alloc(pfp); } TX_ONABORT { error = errno; } TX_END @@ -337,17 +339,53 @@ struct resume_info { PMEMobjpool *old_pop; }; +struct suspend_info { + PMEMfilepool *pfp; + unsigned count; + struct pmemfile_vinode *dst_vinode; + struct pmemfile_block_desc *last_block; +}; + +static size_t +print_toid(size_t buf_size, char buf[buf_size], + TOID(struct pmemfile_inode) *tinode) +{ + uint64_t raw[2]; + memcpy(raw, tinode, sizeof(raw)); + + return (size_t)snprintf(buf, buf_size, + "0x%016" PRIx64 ":0x%016" PRIx64 "\n", raw[0], raw[1]); +} + static void -vinode_suspend_cb(uint64_t off, void *vinode, void *arg) +vinode_suspend_append_special_file(struct pmemfile_vinode *vinode, + struct suspend_info *desc) { - vinode_suspend(arg, vinode); + char line[SUSPENDED_INODE_LINE_LENGTH + 1]; + size_t line_len = print_toid(sizeof(line), line, &vinode->tinode); + + struct pmemfile_inode *dst_inode = desc->dst_vinode->inode; + + size_t allocated = inode_get_allocated_space(dst_inode); + allocated += vinode_allocate_interval(desc->pfp, desc->dst_vinode, + inode_get_size(dst_inode), line_len); + *(inode_get_allocated_space_ptr(dst_inode)) = allocated; + vinode_write(desc->pfp, desc->dst_vinode, inode_get_size(dst_inode), + &desc->last_block, line, line_len); + *(inode_get_size_ptr(dst_inode)) += line_len; } static void -inode_resume_cb(uint64_t off, void *vinode, void *arg) +vinode_suspend_cb(uint64_t off, void *vinode, void *arg) { - struct resume_info *info = arg; - inode_resume(info->pfp, vinode, info->old_pop); + struct suspend_info *desc = (struct suspend_info *)arg; + + if (vinode == desc->dst_vinode) + return; + + vinode_suspend_append_special_file(vinode, desc); + + vinode_suspend(desc->pfp, vinode); } static void @@ -357,24 +395,55 @@ vinode_resume_cb(uint64_t off, void *vinode, void *arg) vinode_resume(info->pfp, vinode, info->old_pop); } +static int +check_paths_on_resume(PMEMfilepool *pfp, PMEMfile *file_at, + const char *const *paths) +{ + for (const char *const *path = paths; *path != NULL; ++path) { + uintptr_t off; + PMEMfile *file; + + file = pmemfile_openat(pfp, file_at, *path, PMEMFILE_O_RDONLY); + if (file == NULL) + return -1; + off = (uintptr_t)file->vinode->inode - (uintptr_t)pfp->pop; + pmemfile_close(pfp, file); + + if (off != pfp->suspense) { + errno = EINVAL; + return -1; + } + } + + return 0; +} + /* * pmemfile_pool_resume -- notifies pmemfile that pool is now going to be used * * Can be called only after pmemfile_pool_suspend. */ int -pmemfile_pool_resume(PMEMfilepool *pfp, const char *pathname) +pmemfile_pool_resume(PMEMfilepool *pfp, const char *pool_path, + unsigned root_index, const char *const *paths, int flags) { + if (flags != 0) { + errno = EINVAL; + return -1; + } + PMEMobjpool *new_pop = NULL; while (new_pop == NULL) { - new_pop = pmemobj_open(pathname, POBJ_LAYOUT_NAME(pmemfile)); + new_pop = pmemobj_open(pool_path, POBJ_LAYOUT_NAME(pmemfile)); if (new_pop == NULL) // XXX os_usleep(1000); } int error = 0; + PMEMfile *file_at = NULL; + PMEMobjpool *old_pop = pfp->pop; struct pmemfile_super *old_super = pfp->super; @@ -391,25 +460,36 @@ pmemfile_pool_resume(PMEMfilepool *pfp, const char *pathname) struct resume_info arg = {pfp, old_pop}; - TX_BEGIN_CB(pfp->pop, cb_queue, pfp) { - hash_map_traverse(pfp->inode_map, inode_resume_cb, &arg); - } TX_ONABORT { - error = -1; - } TX_END + hash_map_traverse(pfp->inode_map, vinode_resume_cb, &arg); - if (error) { - int oerrno = errno; - pmemobj_close(new_pop); - errno = oerrno; + file_at = pmemfile_open_root(pfp, root_index, 0); + if (file_at == NULL) { + error = errno; + goto err; + } - pfp->pop = old_pop; - pfp->super = old_super; - return -1; + if (check_paths_on_resume(pfp, file_at, paths) != 0) + goto err; + + for (const char *const *path = paths; *path != NULL; ++path) { + if (pmemfile_unlinkat(pfp, file_at, *path, 0) != 0) + goto err; } - hash_map_traverse(pfp->inode_map, vinode_resume_cb, &arg); + pmemfile_close(pfp, file_at); return 0; + +err: + if (file_at != NULL) + pmemfile_close(pfp, file_at); + + pmemobj_close(new_pop); + errno = error; + + pfp->pop = old_pop; + pfp->super = old_super; + return -1; } /* @@ -421,19 +501,112 @@ pmemfile_pool_resume(PMEMfilepool *pfp, const char *pathname) * not being safe)! */ int -pmemfile_pool_suspend(PMEMfilepool *pfp) +pmemfile_pool_suspend(PMEMfilepool *pfp, unsigned root_index, + const char *const *paths, int flags) { + if (flags != 0 || paths == NULL || paths[0] == NULL) { + errno = EINVAL; + return -1; + } + + struct suspend_info sinfo = {.pfp = pfp, }; + sinfo.count = 0; + + while (paths[sinfo.count] != NULL) { + if (paths[sinfo.count][0] == '\0') { + errno = EINVAL; + return -1; + } + + sinfo.count++; + } + int error = 0; + PMEMfile *file_at = NULL; + PMEMfile *file = NULL; + + file_at = pmemfile_open_root(pfp, root_index, 0); + if (file_at == NULL) + return -1; + + file = pmemfile_openat(pfp, file_at, paths[0], + PMEMFILE_O_CREAT | PMEMFILE_O_EXCL | PMEMFILE_O_RDWR, + 0400); + + if (file == NULL) { + error = errno; + goto err; + } + + os_rwlock_wrlock(&file->vinode->rwlock); + if (!file->vinode->blocks) + error = vinode_rebuild_block_tree(pfp, file->vinode); + os_rwlock_unlock(&file->vinode->rwlock); + if (error) + goto err; + + sinfo.dst_vinode = file->vinode; + + for (unsigned i = 1; i < sinfo.count; ++i) { + if (pmemfile_linkat(pfp, file_at, paths[0], + file_at, paths[i], 0) != 0) { + error = errno; + goto err; + } + } TX_BEGIN_CB(pfp->pop, cb_queue, pfp) { - hash_map_traverse(pfp->inode_map, vinode_suspend_cb, pfp); + /* + * Set the flag inidicating a special file in the transaction. + * If the transaction fails, and power goes out before removing + * the file, then it just stays there as a regular (empty) file, + * not cousing a lot of trouble. + */ + TX_ADD_DIRECT(inode_get_flags_ptr(sinfo.dst_vinode->inode)); + *(inode_get_flags_ptr(sinfo.dst_vinode->inode)) |= + PMEMFILE_I_SUSPENDED_REF; + + /* + * These two fields are updated with each entry written to the + * special file. + */ + TX_ADD_DIRECT( + inode_get_allocated_space_ptr(sinfo.dst_vinode->inode)); + TX_ADD_DIRECT(inode_get_size_ptr(sinfo.dst_vinode->inode)); + + hash_map_traverse(pfp->inode_map, vinode_suspend_cb, &sinfo); } TX_ONABORT { - error = -1; + error = errno; } TX_END if (error) - return -1; + goto err; + + pfp->suspense = (uintptr_t)file->vinode->inode - (uintptr_t)pfp->pop; + pmemfile_close(pfp, file_at); + pmemfile_close(pfp, file); pmemobj_close(pfp->pop); return 0; + +err: + if (file != NULL) + pmemfile_close(pfp, file); + + for (unsigned i = 0; i < sinfo.count; ++i) { + /* + * If something went wrong, the files are unlinked here. It is + * important that they should not have the relevant internal + * flag set, as pmemfile_unlink would attempt to decrement + * suspended reference counters. + * They must still be actual regular files at this point. + */ + pmemfile_unlinkat(pfp, file_at, paths[i], 0); + } + + if (file_at != NULL) + pmemfile_close(pfp, file_at); + + errno = error; + return -1; } diff --git a/src/libpmemfile-posix/pool.h b/src/libpmemfile-posix/pool.h index 8c7008d80..7e7dbb2b1 100644 --- a/src/libpmemfile-posix/pool.h +++ b/src/libpmemfile-posix/pool.h @@ -68,6 +68,8 @@ struct pmemfilepool { /* current credentials */ struct pmemfile_cred cred; os_rwlock_t cred_rwlock; + + uintptr_t suspense; /* XXX perhaps a better name for this field? */ }; #endif diff --git a/src/libpmemfile-posix/unlink.c b/src/libpmemfile-posix/unlink.c index e5b9b33fe..8b5709a0d 100644 --- a/src/libpmemfile-posix/unlink.c +++ b/src/libpmemfile-posix/unlink.c @@ -37,6 +37,7 @@ #include #include "callbacks.h" +#include "data.h" #include "dir.h" #include "libpmemfile-posix.h" #include "out.h" @@ -95,6 +96,62 @@ vinode_unlink_file(PMEMfilepool *pfp, dirent->inode = TOID_NULL(struct pmemfile_inode); } +static struct pmemfile_inode * +parse_inode_toid(PMEMfilepool *pfp, const char *buf) +{ + if (strchr(buf, '\n') != buf + SUSPENDED_INODE_LINE_LENGTH - 1) + pmemobj_tx_abort(EINVAL); + + uint64_t raw[2]; + TOID(struct pmemfile_inode) result; + COMPILE_ERROR_ON(sizeof(result) != sizeof(raw)); + + char *endptr; + + buf += 2; /* "0x" */ + uintmax_t n = strtoumax(buf, &endptr, 16); + if (n == 0 || n >= UINT64_MAX || *endptr != ':') + pmemobj_tx_abort(EINVAL); + + raw[0] = (uint64_t)n; + + buf = endptr; + ++buf; /* ":" */ + buf += 2; /* "0x" */ + + n = strtoumax(buf, &endptr, 16); + if (n == 0 || n >= UINT64_MAX || *endptr != '\n') + pmemobj_tx_abort(EINVAL); + + raw[1] = (uint64_t)n; + + memcpy(&result, raw, sizeof(result)); + + return PF_RW(pfp, result); +} + +static void +decrement_susp_ref_counts(PMEMfilepool *pfp, struct pmemfile_vinode *vinode) +{ + char line[SUSPENDED_INODE_LINE_LENGTH]; + size_t offset = 0; + struct pmemfile_block_desc *last_block = NULL; + size_t r; + + while ((r = vinode_read(pfp, vinode, offset, &last_block, + line, sizeof(line))) == sizeof(line)) { + struct pmemfile_inode *inode = parse_inode_toid(pfp, line); + + TX_ADD_DIRECT(&inode->suspended_references); + inode->suspended_references--; + + offset += sizeof(line); + } + + if (r != 0) /* The file can't have a partial line */ + pmemobj_tx_abort(EINVAL); +} + static int _pmemfile_unlinkat(PMEMfilepool *pfp, struct pmemfile_vinode *dir, const char *pathname) @@ -149,17 +206,41 @@ _pmemfile_unlinkat(PMEMfilepool *pfp, struct pmemfile_vinode *dir, struct pmemfile_time t; get_current_time(&t); + struct pmemfile_vinode *vinode = dirent_info.vinode; + + bool is_special_suspended_refs_inode = + inode_has_suspended_refs(vinode->inode); + + if (is_special_suspended_refs_inode) { + os_rwlock_wrlock(&pfp->super_rwlock); + if (!vinode->blocks) + error = vinode_rebuild_block_tree(pfp, vinode); + if (error) + goto end_vinode_super_unlock; + } + TX_BEGIN_CB(pfp->pop, cb_queue, pfp) { vinode_unlink_file(pfp, info.parent, dirent_info.dirent, - dirent_info.vinode, t); - - if (inode_get_nlink(dirent_info.vinode->inode) == 0) - vinode_orphan(pfp, dirent_info.vinode); + vinode, t); + + if (inode_get_nlink(vinode->inode) == 0) { + if (is_special_suspended_refs_inode) { + decrement_susp_ref_counts(pfp, vinode); + vinode_orphan_unlocked(pfp, vinode); + } else { + vinode_orphan(pfp, vinode); + } + } } TX_ONABORT { error = errno; } TX_END +end_vinode_super_unlock: + if (is_special_suspended_refs_inode) + os_rwlock_unlock(&pfp->super_rwlock); + end_vinode: + vinode_unlock2(dirent_info.vinode, info.parent); vinode_unref(pfp, dirent_info.vinode); diff --git a/src/libpmemfile/libpmemfile-posix-wrappers.h b/src/libpmemfile/libpmemfile-posix-wrappers.h index 79230b883..63021307b 100644 --- a/src/libpmemfile/libpmemfile-posix-wrappers.h +++ b/src/libpmemfile/libpmemfile-posix-wrappers.h @@ -1942,36 +1942,54 @@ wrapper_pmemfile_errormsg(void) static inline int wrapper_pmemfile_pool_resume(PMEMfilepool *pfp, - const char *pathname) + const char *pool_path, + unsigned at_root, + const char *const *paths, + int flags) { int ret; ret = pmemfile_pool_resume(pfp, - pathname); + pool_path, + at_root, + paths, + flags); if (ret < 0) ret = -errno; log_write( - "pmemfile_pool_resume(%p, \"%s\") = %d", + "pmemfile_pool_resume(%p, %p, %u, %p, %d) = %d", pfp, - pathname, + pool_path, + at_root, + paths, + flags, ret); return ret; } static inline int -wrapper_pmemfile_pool_suspend(PMEMfilepool *pfp) +wrapper_pmemfile_pool_suspend(PMEMfilepool *pfp, + unsigned at_root, + const char *const *paths, + int flags) { int ret; - ret = pmemfile_pool_suspend(pfp); + ret = pmemfile_pool_suspend(pfp, + at_root, + paths, + flags); if (ret < 0) ret = -errno; log_write( - "pmemfile_pool_suspend(%p) = %d", + "pmemfile_pool_suspend(%p, %u, %p, %d) = %d", pfp, + at_root, + paths, + flags, ret); return ret; diff --git a/src/libpmemfile/preload.c b/src/libpmemfile/preload.c index 01a67f554..1f68b7aac 100644 --- a/src/libpmemfile/preload.c +++ b/src/libpmemfile/preload.c @@ -137,6 +137,21 @@ static int pool_count; #define RWF_SYNC 0x00000004 #endif +/* + * Path where a special inode is created, to refer to suspended inodes. + * The format could be "proc/%d", but one needs to make sure the "proc" + * directory exists. + */ +static const unsigned suspended_refs_root_index = 1; +static const char suspended_refs_f[] = "%d"; + +static void +print_suspended_refs_path(size_t size, char buffer[size]) +{ + int pid = (int)syscall_no_intercept(SYS_getpid); + snprintf(buffer, size, suspended_refs_f, pid); +} + /* * pool_acquire -- acquires access to pool */ @@ -150,7 +165,12 @@ pool_acquire(struct pool_description *pool) pool->ref_cnt++; if (pool->ref_cnt == 1 && pool->suspended) { - if (pmemfile_pool_resume(pool->pool, pool->poolfile_path)) + char suspended[sizeof(suspended_refs_f) + 16]; + print_suspended_refs_path(sizeof(suspended), suspended); + + if (pmemfile_pool_resume(pool->pool, pool->poolfile_path, + suspended_refs_root_index, + (const char *[]) {suspended, NULL}, 0) != 0) FATAL("could not restore pmemfile pool"); pool->suspended = false; } @@ -173,7 +193,12 @@ pool_release(struct pool_description *pool) pool->ref_cnt--; if (pool->ref_cnt == 0 && !pool->suspended) { - if (pmemfile_pool_suspend(pool->pool)) + char suspended[sizeof(suspended_refs_f) + 16]; + print_suspended_refs_path(sizeof(suspended), suspended); + + if (pmemfile_pool_suspend(pool->pool, + suspended_refs_root_index, + (const char *[]) {suspended, NULL}, 0) != 0) FATAL("could not suspend pmemfile pool"); pool->suspended = true; } diff --git a/tests/posix/pmemfile_test.cpp b/tests/posix/pmemfile_test.cpp index 2006703a4..d75e81b3c 100644 --- a/tests/posix/pmemfile_test.cpp +++ b/tests/posix/pmemfile_test.cpp @@ -92,11 +92,11 @@ test_pmemfile_stats_match(PMEMfilepool *pfp, unsigned inodes, unsigned dirs, EXPECT_EQ(stats.inodes, inodes); EXPECT_EQ(stats.dirs, dirs); EXPECT_EQ(stats.block_arrays, block_arrays); - EXPECT_EQ(stats.inode_arrays, 2u); + EXPECT_EQ(stats.inode_arrays, 1u); EXPECT_EQ(stats.blocks, blocks); return stats.inodes == inodes && stats.dirs == dirs && - stats.block_arrays == block_arrays && stats.inode_arrays == 2 && + stats.block_arrays == block_arrays && stats.inode_arrays == 1 && stats.blocks == blocks; } From c694c15639df379611c2a7fd3088982dd4fbecd8 Mon Sep 17 00:00:00 2001 From: Gabor Buella Date: Mon, 9 Oct 2017 17:58:25 +0200 Subject: [PATCH 3/3] tests: suspend_resume --- tests/posix/CMakeLists.txt | 6 + .../posix/suspend_resume/suspend_resume.cmake | 38 ++++ tests/posix/suspend_resume/suspend_resume.cpp | 165 ++++++++++++++++++ 3 files changed, 209 insertions(+) create mode 100644 tests/posix/suspend_resume/suspend_resume.cmake create mode 100644 tests/posix/suspend_resume/suspend_resume.cpp diff --git a/tests/posix/CMakeLists.txt b/tests/posix/CMakeLists.txt index ec84dac1a..933f6b70a 100644 --- a/tests/posix/CMakeLists.txt +++ b/tests/posix/CMakeLists.txt @@ -116,6 +116,7 @@ compile_test_source(file_rw_o rw/rw.cpp) compile_test_source(file_stat_o stat/stat.cpp) compile_test_source(file_symlinks_o symlinks/symlinks.cpp) compile_test_source(file_timestamps_o timestamps/timestamps.cpp) +compile_test_source(file_suspend_resume_o suspend_resume/suspend_resume.cpp) function(build_test name posix_lib obj_lib_name) add_executable(${name} $) @@ -154,6 +155,7 @@ build_test_using_shared(file_rw file_rw_o) build_test_using_shared(file_stat file_stat_o) build_test_using_shared(file_symlinks file_symlinks_o) build_test_using_shared(file_timestamps file_timestamps_o) +build_test_using_shared(file_suspend_resume file_suspend_resume_o) target_include_directories(file_offset_mapping PUBLIC ${PMEMOBJ_INCLUDE_DIRS}) target_include_directories(file_offset_mapping PUBLIC ${CMAKE_SOURCE_DIR}/src/libpmemfile-posix) @@ -330,6 +332,10 @@ add_test_generic(symlinks memcheck) add_test_generic(timestamps none) add_test_generic(timestamps memcheck) +add_test_generic(suspend_resume none) +add_test_generic(suspend_resume memcheck) +add_test_generic(suspend_resume pmemcheck) + if(NOT LONG_TESTS) add_test(NAME SOME_TESTS_WERE_SKIPPED_BECAUSE_LONG_TESTS_ARE_DISABLED COMMAND true) diff --git a/tests/posix/suspend_resume/suspend_resume.cmake b/tests/posix/suspend_resume/suspend_resume.cmake new file mode 100644 index 000000000..febd6304a --- /dev/null +++ b/tests/posix/suspend_resume/suspend_resume.cmake @@ -0,0 +1,38 @@ +# +# Copyright 2017, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include(${SRC_DIR}/../posix-helpers.cmake) + +setup() + +execute(${TEST_EXECUTABLE}) + +cleanup() diff --git a/tests/posix/suspend_resume/suspend_resume.cpp b/tests/posix/suspend_resume/suspend_resume.cpp new file mode 100644 index 000000000..501c33871 --- /dev/null +++ b/tests/posix/suspend_resume/suspend_resume.cpp @@ -0,0 +1,165 @@ +/* + * Copyright 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * suspend_resume.cpp -- test for pmemfile_pool_[suspend|resume] + */ + +#include "pmemfile_test.hpp" + +#include +#include + +static const char *pool_path; + +static PMEMfilepool * +create_pool() +{ + return pmemfile_pool_create(pool_path, 16 * 1024 * 1024, + PMEMFILE_S_IWUSR | PMEMFILE_S_IRUSR); +} + +static bool +contains_two_ints(const char *buffer) +{ + uint64_t raw[2]; + if (sscanf(buffer, "0x%" SCNx64 ":0x%" SCNx64 "\n", raw, raw + 1) != 2) + return false; + + return raw[0] != 0 && raw[1] != 0; +} + +TEST(suspend_resume, 0) +{ + static const char *const paths0[] = {nullptr}; + static const char *const paths[] = {"dummy0", "dummy1", "dummy2", + nullptr}; + ssize_t r; + + PMEMfilepool *pfp = create_pool(); + ASSERT_NE(pfp, nullptr) << strerror(errno); + + errno = 0; + r = pmemfile_pool_suspend(pfp, 1, paths, 1); + ASSERT_EQ(r, -1); + EXPECT_EQ(errno, EINVAL); + + errno = 0; + r = pmemfile_pool_suspend(pfp, 1, paths0, 0); + ASSERT_EQ(r, -1); + EXPECT_EQ(errno, EINVAL); + + errno = 0; + r = pmemfile_pool_suspend(pfp, 255, paths, 0); + ASSERT_EQ(r, -1); + EXPECT_EQ(errno, EINVAL); + + errno = 0; + r = pmemfile_pool_suspend(pfp, 1, paths, 0); + ASSERT_EQ(r, 0) << strerror(errno); + + errno = 0; + r = pmemfile_pool_resume(pfp, pool_path, 1, paths, 0); + ASSERT_EQ(r, 0) << strerror(errno); + + pmemfile_pool_close(pfp); + + pfp = pmemfile_pool_open(pool_path); + ASSERT_NE(pfp, nullptr) << strerror(errno); + + PMEMfile *f0 = pmemfile_open(pfp, "/file0", + PMEMFILE_O_CREAT | PMEMFILE_O_EXCL, 0700); + ASSERT_NE(f0, nullptr) << strerror(errno); + PMEMfile *f1 = pmemfile_open(pfp, "/file1", + PMEMFILE_O_CREAT | PMEMFILE_O_EXCL, 0700); + ASSERT_NE(f1, nullptr) << strerror(errno); + + errno = 0; + r = pmemfile_pool_suspend(pfp, 1, paths, 0); + ASSERT_EQ(r, 0) << strerror(errno); + + errno = 0; + PMEMfilepool *pfp2 = pmemfile_pool_open(pool_path); + ASSERT_NE(pfp2, nullptr) << strerror(errno); + + errno = 0; + PMEMfile *root1 = pmemfile_open_root(pfp2, 1, 0); + ASSERT_NE(root1, nullptr) << strerror(errno); + + errno = 0; + PMEMfile *dummy1 = + pmemfile_openat(pfp2, root1, "dummy1", PMEMFILE_O_RDONLY); + ASSERT_NE(dummy1, nullptr) << strerror(errno); + + char buf[0x1000]; + + r = pmemfile_read(pfp2, dummy1, buf, sizeof(buf)); + ASSERT_GE(r, 16) << strerror(errno); + ASSERT_NE(r, 0x100); + buf[r] = '\0'; + + ASSERT_NE(strchr(buf, '\n'), nullptr); + ASSERT_TRUE(contains_two_ints(buf)); + ASSERT_NE(strchr(strchr(buf, '\n') + 1, '\n'), nullptr); + ASSERT_TRUE(contains_two_ints(strchr(buf, '\n') + 1)); + + pmemfile_close(pfp2, dummy1); + pmemfile_close(pfp2, root1); + pmemfile_pool_close(pfp2); + + errno = 0; + r = pmemfile_pool_resume(pfp, pool_path, 1, paths, 0); + ASSERT_EQ(r, 0) << strerror(errno); + + pmemfile_close(pfp, f0); + pmemfile_close(pfp, f1); + + pmemfile_pool_close(pfp); +} + +int +main(int argc, char *argv[]) +{ + START(); + + if (argc < 2) { + fprintf(stderr, "usage: %s path", argv[0]); + exit(1); + } + + static std::string pool_path_container = + std::string(argv[1]) + std::string("/pool"); + pool_path = pool_path_container.c_str(); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}