diff --git a/fs/fs_zenfs.cc b/fs/fs_zenfs.cc index 966769a7..d671650c 100644 --- a/fs/fs_zenfs.cc +++ b/fs/fs_zenfs.cc @@ -26,7 +26,7 @@ namespace ROCKSDB_NAMESPACE { -Status Superblock::DecodeFrom(Slice* input) { +Status Superblock::DecodeFrom(Slice *input) { if (input->size() != ENCODED_SIZE) { return Status::Corruption("ZenFS Superblock", "Error: Superblock size missmatch"); @@ -56,7 +56,7 @@ Status Superblock::DecodeFrom(Slice* input) { return Status::OK(); } -void Superblock::EncodeTo(std::string* output) { +void Superblock::EncodeTo(std::string *output) { sequence_++; /* Ensure that this superblock representation is unique */ output->clear(); PutFixed32(output, magic_); @@ -73,7 +73,7 @@ void Superblock::EncodeTo(std::string* output) { assert(output->length() == ENCODED_SIZE); } -Status Superblock::CompatibleWith(ZonedBlockDevice* zbd) { +Status Superblock::CompatibleWith(ZonedBlockDevice *zbd) { if (block_size_ != zbd->GetBlockSize()) return Status::Corruption("ZenFS Superblock", "Error: block size missmatch"); @@ -86,12 +86,12 @@ Status Superblock::CompatibleWith(ZonedBlockDevice* zbd) { return Status::OK(); } -IOStatus ZenMetaLog::AddRecord(const Slice& slice) { +IOStatus ZenMetaLog::AddRecord(const Slice &slice) { uint32_t record_sz = slice.size(); - const char* data = slice.data(); + const char *data = slice.data(); size_t phys_sz; uint32_t crc = 0; - char* buffer; + char *buffer; int ret; IOStatus s; @@ -102,12 +102,12 @@ IOStatus ZenMetaLog::AddRecord(const Slice& slice) { assert(data != nullptr); assert((phys_sz % bs_) == 0); - ret = posix_memalign((void**)&buffer, sysconf(_SC_PAGESIZE), phys_sz); + ret = posix_memalign((void **)&buffer, sysconf(_SC_PAGESIZE), phys_sz); if (ret) return IOStatus::IOError("Failed to allocate memory"); memset(buffer, 0, phys_sz); - crc = crc32c::Extend(crc, (const char*)&record_sz, sizeof(uint32_t)); + crc = crc32c::Extend(crc, (const char *)&record_sz, sizeof(uint32_t)); crc = crc32c::Extend(crc, data, record_sz); crc = crc32c::Mask(crc); @@ -121,9 +121,9 @@ IOStatus ZenMetaLog::AddRecord(const Slice& slice) { return s; } -IOStatus ZenMetaLog::Read(Slice* slice) { +IOStatus ZenMetaLog::Read(Slice *slice) { int f = zbd_->GetReadFD(); - const char* data = slice->data(); + const char *data = slice->data(); size_t read = 0; size_t to_read = slice->size(); int ret; @@ -139,7 +139,7 @@ IOStatus ZenMetaLog::Read(Slice* slice) { } while (read < to_read) { - ret = pread(f, (void*)(data + read), to_read - read, read_pos_); + ret = pread(f, (void *)(data + read), to_read - read, read_pos_); if (ret == -1 && errno == EINTR) continue; if (ret < 0) return IOStatus::IOError("Read failed"); @@ -151,7 +151,7 @@ IOStatus ZenMetaLog::Read(Slice* slice) { return IOStatus::OK(); } -IOStatus ZenMetaLog::ReadRecord(Slice* record, std::string* scratch) { +IOStatus ZenMetaLog::ReadRecord(Slice *record, std::string *scratch) { Slice header; uint32_t record_sz = 0; uint32_t record_crc = 0; @@ -183,7 +183,7 @@ IOStatus ZenMetaLog::ReadRecord(Slice* record, std::string* scratch) { s = Read(record); if (!s.ok()) return s; - actual_crc = crc32c::Value((const char*)&record_sz, sizeof(uint32_t)); + actual_crc = crc32c::Value((const char *)&record_sz, sizeof(uint32_t)); actual_crc = crc32c::Extend(actual_crc, record->data(), record->size()); if (actual_crc != crc32c::Unmask(record_crc)) { @@ -196,7 +196,7 @@ IOStatus ZenMetaLog::ReadRecord(Slice* record, std::string* scratch) { return IOStatus::OK(); } -ZenFS::ZenFS(ZonedBlockDevice* zbd, std::shared_ptr aux_fs, +ZenFS::ZenFS(ZonedBlockDevice *zbd, std::shared_ptr aux_fs, std::shared_ptr logger) : FileSystemWrapper(aux_fs), zbd_(zbd), logger_(logger) { Info(logger_, "ZenFS initializing"); @@ -220,18 +220,18 @@ ZenFS::~ZenFS() { } void ZenFS::LogFiles() { - std::map::iterator it; + std::map::iterator it; uint64_t total_size = 0; Info(logger_, " Files:\n"); for (it = files_.begin(); it != files_.end(); it++) { - ZoneFile* zFile = it->second; - std::vector extents = zFile->GetExtents(); + ZoneFile *zFile = it->second; + std::vector extents = zFile->GetExtents(); Info(logger_, " %-45s sz: %lu lh: %d", it->first.c_str(), zFile->GetFileSize(), zFile->GetWriteLifeTimeHint()); for (unsigned int i = 0; i < extents.size(); i++) { - ZoneExtent* extent = extents[i]; + ZoneExtent *extent = extents[i]; Info(logger_, " Extent %u {start=0x%lx, zone=%u, len=%u} ", i, extent->start_, (uint32_t)(extent->zone_->start_ / zbd_->GetZoneSize()), @@ -245,7 +245,7 @@ void ZenFS::LogFiles() { } void ZenFS::ClearFiles() { - std::map::iterator it; + std::map::iterator it; files_mtx_.lock(); for (it = files_.begin(); it != files_.end(); it++) delete it->second; files_.clear(); @@ -253,7 +253,7 @@ void ZenFS::ClearFiles() { } /* Assumes that files_mutex_ is held */ -IOStatus ZenFS::WriteSnapshotLocked(ZenMetaLog* meta_log) { +IOStatus ZenFS::WriteSnapshotLocked(ZenMetaLog *meta_log) { IOStatus s; std::string snapshot; @@ -261,14 +261,14 @@ IOStatus ZenFS::WriteSnapshotLocked(ZenMetaLog* meta_log) { s = meta_log->AddRecord(snapshot); if (s.ok()) { for (auto it = files_.begin(); it != files_.end(); it++) { - ZoneFile* zoneFile = it->second; + ZoneFile *zoneFile = it->second; zoneFile->MetadataSynced(); } } return s; } -IOStatus ZenFS::WriteEndRecord(ZenMetaLog* meta_log) { +IOStatus ZenFS::WriteEndRecord(ZenMetaLog *meta_log) { std::string endRecord; PutFixed32(&endRecord, kEndRecord); @@ -277,7 +277,7 @@ IOStatus ZenFS::WriteEndRecord(ZenMetaLog* meta_log) { /* Assumes the files_mtx_ is held */ IOStatus ZenFS::RollMetaZoneLocked() { - ZenMetaLog* new_meta_log; + ZenMetaLog *new_meta_log; Zone *new_meta_zone, *old_meta_zone; IOStatus s; @@ -318,7 +318,7 @@ IOStatus ZenFS::RollMetaZoneLocked() { return s; } -IOStatus ZenFS::PersistSnapshot(ZenMetaLog* meta_writer) { +IOStatus ZenFS::PersistSnapshot(ZenMetaLog *meta_writer) { IOStatus s; files_mtx_.lock(); @@ -357,7 +357,7 @@ IOStatus ZenFS::PersistRecord(std::string record) { return s; } -IOStatus ZenFS::SyncFileMetadata(ZoneFile* zoneFile) { +IOStatus ZenFS::SyncFileMetadata(ZoneFile *zoneFile) { std::string fileRecord; std::string output; @@ -378,8 +378,8 @@ IOStatus ZenFS::SyncFileMetadata(ZoneFile* zoneFile) { return s; } -ZoneFile* ZenFS::GetFile(std::string fname) { - ZoneFile* zoneFile = nullptr; +ZoneFile *ZenFS::GetFile(std::string fname) { + ZoneFile *zoneFile = nullptr; files_mtx_.lock(); if (files_.find(fname) != files_.end()) { zoneFile = files_[fname]; @@ -389,7 +389,7 @@ ZoneFile* ZenFS::GetFile(std::string fname) { } IOStatus ZenFS::DeleteFile(std::string fname) { - ZoneFile* zoneFile = nullptr; + ZoneFile *zoneFile = nullptr; IOStatus s; zoneFile = GetFile(fname); @@ -413,11 +413,11 @@ IOStatus ZenFS::DeleteFile(std::string fname) { return s; } -IOStatus ZenFS::NewSequentialFile(const std::string& fname, - const FileOptions& file_opts, - std::unique_ptr* result, - IODebugContext* dbg) { - ZoneFile* zoneFile = GetFile(fname); +IOStatus ZenFS::NewSequentialFile(const std::string &fname, + const FileOptions &file_opts, + std::unique_ptr *result, + IODebugContext *dbg) { + ZoneFile *zoneFile = GetFile(fname); Debug(logger_, "New sequential file: %s direct: %d\n", fname.c_str(), file_opts.use_direct_reads); @@ -431,11 +431,11 @@ IOStatus ZenFS::NewSequentialFile(const std::string& fname, return IOStatus::OK(); } -IOStatus ZenFS::NewRandomAccessFile(const std::string& fname, - const FileOptions& file_opts, - std::unique_ptr* result, - IODebugContext* dbg) { - ZoneFile* zoneFile = GetFile(fname); +IOStatus ZenFS::NewRandomAccessFile(const std::string &fname, + const FileOptions &file_opts, + std::unique_ptr *result, + IODebugContext *dbg) { + ZoneFile *zoneFile = GetFile(fname); Debug(logger_, "New random access file: %s direct: %d\n", fname.c_str(), file_opts.use_direct_reads); @@ -449,11 +449,11 @@ IOStatus ZenFS::NewRandomAccessFile(const std::string& fname, return IOStatus::OK(); } -IOStatus ZenFS::NewWritableFile(const std::string& fname, - const FileOptions& file_opts, - std::unique_ptr* result, - IODebugContext* /*dbg*/) { - ZoneFile* zoneFile; +IOStatus ZenFS::NewWritableFile(const std::string &fname, + const FileOptions &file_opts, + std::unique_ptr *result, + IODebugContext * /*dbg*/) { + ZoneFile *zoneFile; IOStatus s; Debug(logger_, "New writable file: %s direct: %d\n", fname.c_str(), @@ -484,11 +484,11 @@ IOStatus ZenFS::NewWritableFile(const std::string& fname, return s; } -IOStatus ZenFS::ReuseWritableFile(const std::string& fname, - const std::string& old_fname, - const FileOptions& file_opts, - std::unique_ptr* result, - IODebugContext* dbg) { +IOStatus ZenFS::ReuseWritableFile(const std::string &fname, + const std::string &old_fname, + const FileOptions &file_opts, + std::unique_ptr *result, + IODebugContext *dbg) { Debug(logger_, "Reuse writable file: %s old name: %s\n", fname.c_str(), old_fname.c_str()); @@ -498,8 +498,8 @@ IOStatus ZenFS::ReuseWritableFile(const std::string& fname, return NewWritableFile(fname, file_opts, result, dbg); } -IOStatus ZenFS::FileExists(const std::string& fname, const IOOptions& options, - IODebugContext* dbg) { +IOStatus ZenFS::FileExists(const std::string &fname, const IOOptions &options, + IODebugContext *dbg) { Debug(logger_, "FileExists: %s \n", fname.c_str()); if (GetFile(fname) == nullptr) { @@ -509,10 +509,10 @@ IOStatus ZenFS::FileExists(const std::string& fname, const IOOptions& options, } } -IOStatus ZenFS::ReopenWritableFile(const std::string& fname, - const FileOptions& options, - std::unique_ptr* result, - IODebugContext* dbg) { +IOStatus ZenFS::ReopenWritableFile(const std::string &fname, + const FileOptions &options, + std::unique_ptr *result, + IODebugContext *dbg) { Debug(logger_, "Reopen writable file: %s \n", fname.c_str()); if (GetFile(fname) != nullptr) @@ -521,17 +521,17 @@ IOStatus ZenFS::ReopenWritableFile(const std::string& fname, return target()->NewWritableFile(fname, options, result, dbg); } -IOStatus ZenFS::GetChildren(const std::string& dir, const IOOptions& options, - std::vector* result, - IODebugContext* dbg) { - std::map::iterator it; +IOStatus ZenFS::GetChildren(const std::string &dir, const IOOptions &options, + std::vector *result, + IODebugContext *dbg) { + std::map::iterator it; std::vector auxfiles; IOStatus s; Debug(logger_, "GetChildren: %s \n", dir.c_str()); target()->GetChildren(ToAuxPath(dir), options, &auxfiles, dbg); - for (const auto& f : auxfiles) { + for (const auto &f : auxfiles) { if (f != "." && f != "..") result->push_back(f); } @@ -555,10 +555,10 @@ IOStatus ZenFS::GetChildren(const std::string& dir, const IOOptions& options, return s; } -IOStatus ZenFS::DeleteFile(const std::string& fname, const IOOptions& options, - IODebugContext* dbg) { +IOStatus ZenFS::DeleteFile(const std::string &fname, const IOOptions &options, + IODebugContext *dbg) { IOStatus s; - ZoneFile* zoneFile = GetFile(fname); + ZoneFile *zoneFile = GetFile(fname); Debug(logger_, "Delete file: %s \n", fname.c_str()); @@ -576,10 +576,10 @@ IOStatus ZenFS::DeleteFile(const std::string& fname, const IOOptions& options, return s; } -IOStatus ZenFS::GetFileModificationTime(const std::string& f, - const IOOptions& options, - uint64_t* mtime, IODebugContext* dbg) { - ZoneFile* zoneFile; +IOStatus ZenFS::GetFileModificationTime(const std::string &f, + const IOOptions &options, + uint64_t *mtime, IODebugContext *dbg) { + ZoneFile *zoneFile; IOStatus s; Debug(logger_, "GetFileModificationTime: %s \n", f.c_str()); @@ -594,9 +594,9 @@ IOStatus ZenFS::GetFileModificationTime(const std::string& f, return s; } -IOStatus ZenFS::GetFileSize(const std::string& f, const IOOptions& options, - uint64_t* size, IODebugContext* dbg) { - ZoneFile* zoneFile; +IOStatus ZenFS::GetFileSize(const std::string &f, const IOOptions &options, + uint64_t *size, IODebugContext *dbg) { + ZoneFile *zoneFile; IOStatus s; Debug(logger_, "GetFileSize: %s \n", f.c_str()); @@ -613,9 +613,9 @@ IOStatus ZenFS::GetFileSize(const std::string& f, const IOOptions& options, return s; } -IOStatus ZenFS::RenameFile(const std::string& f, const std::string& t, - const IOOptions& options, IODebugContext* dbg) { - ZoneFile* zoneFile; +IOStatus ZenFS::RenameFile(const std::string &f, const std::string &t, + const IOOptions &options, IODebugContext *dbg) { + ZoneFile *zoneFile; IOStatus s; Debug(logger_, "Rename file: %s to : %s\n", f.c_str(), t.c_str()); @@ -647,13 +647,13 @@ IOStatus ZenFS::RenameFile(const std::string& f, const std::string& t, return s; } -void ZenFS::EncodeSnapshotTo(std::string* output) { - std::map::iterator it; +void ZenFS::EncodeSnapshotTo(std::string *output) { + std::map::iterator it; std::string files_string; PutFixed32(output, kCompleteFilesSnapshot); for (it = files_.begin(); it != files_.end(); it++) { std::string file_string; - ZoneFile* zFile = it->second; + ZoneFile *zFile = it->second; zFile->EncodeSnapshotTo(&file_string); PutLengthPrefixedSlice(&files_string, Slice(file_string)); @@ -661,8 +661,8 @@ void ZenFS::EncodeSnapshotTo(std::string* output) { PutLengthPrefixedSlice(output, Slice(files_string)); } -Status ZenFS::DecodeFileUpdateFrom(Slice* slice) { - ZoneFile* update = new ZoneFile(zbd_, "not_set", 0); +Status ZenFS::DecodeFileUpdateFrom(Slice *slice) { + ZoneFile *update = new ZoneFile(zbd_, "not_set", 0); uint64_t id; Status s; @@ -674,7 +674,7 @@ Status ZenFS::DecodeFileUpdateFrom(Slice* slice) { /* Check if this is an update to an existing file */ for (auto it = files_.begin(); it != files_.end(); it++) { - ZoneFile* zFile = it->second; + ZoneFile *zFile = it->second; if (id == zFile->GetID()) { std::string oldName = zFile->GetFilename(); @@ -699,13 +699,13 @@ Status ZenFS::DecodeFileUpdateFrom(Slice* slice) { return Status::OK(); } -Status ZenFS::DecodeSnapshotFrom(Slice* input) { +Status ZenFS::DecodeSnapshotFrom(Slice *input) { Slice slice; assert(files_.size() == 0); while (GetLengthPrefixedSlice(input, &slice)) { - ZoneFile* zoneFile = new ZoneFile(zbd_, "not_set", 0); + ZoneFile *zoneFile = new ZoneFile(zbd_, "not_set", 0); Status s = zoneFile->DecodeFrom(&slice); if (!s.ok()) return s; @@ -717,7 +717,7 @@ Status ZenFS::DecodeSnapshotFrom(Slice* input) { return Status::OK(); } -void ZenFS::EncodeFileDeletionTo(ZoneFile* zoneFile, std::string* output) { +void ZenFS::EncodeFileDeletionTo(ZoneFile *zoneFile, std::string *output) { std::string file_string; PutFixed64(&file_string, zoneFile->GetID()); @@ -727,7 +727,7 @@ void ZenFS::EncodeFileDeletionTo(ZoneFile* zoneFile, std::string* output) { PutLengthPrefixedSlice(output, Slice(file_string)); } -Status ZenFS::DecodeFileDeletionFrom(Slice* input) { +Status ZenFS::DecodeFileDeletionFrom(Slice *input) { uint64_t fileID; std::string fileName; Slice slice; @@ -742,7 +742,7 @@ Status ZenFS::DecodeFileDeletionFrom(Slice* input) { if (files_.find(fileName) == files_.end()) return Status::Corruption("Zone file deletion: no such file"); - ZoneFile* zoneFile = files_[fileName]; + ZoneFile *zoneFile = files_[fileName]; if (zoneFile->GetID() != fileID) return Status::Corruption("Zone file deletion: file ID missmatch"); @@ -752,7 +752,7 @@ Status ZenFS::DecodeFileDeletionFrom(Slice* input) { return Status::OK(); } -Status ZenFS::RecoverFrom(ZenMetaLog* log) { +Status ZenFS::RecoverFrom(ZenMetaLog *log) { bool at_least_one_snapshot = false; std::string scratch; uint32_t tag = 0; @@ -823,10 +823,10 @@ Status ZenFS::RecoverFrom(ZenMetaLog* log) { /* Mount the filesystem by recovering form the latest valid metadata zone */ Status ZenFS::Mount(bool readonly) { - std::vector metazones = zbd_->GetMetaZones(); + std::vector metazones = zbd_->GetMetaZones(); std::vector> valid_superblocks; std::vector> valid_logs; - std::vector valid_zones; + std::vector valid_zones; std::vector> seq_map; Status s; @@ -879,7 +879,7 @@ Status ZenFS::Mount(bool readonly) { If that fails go to the previous as we might have crashed when rolling metadata zone. */ - for (const auto& sm : seq_map) { + for (const auto &sm : seq_map) { uint32_t i = sm.second; std::string scratch; std::unique_ptr log = std::move(valid_logs[i]); @@ -920,7 +920,7 @@ Status ZenFS::Mount(bool readonly) { } /* Free up old metadata zones, to get ready to roll */ - for (const auto& sm : seq_map) { + for (const auto &sm : seq_map) { uint32_t i = sm.second; /* Don't reset the current metadata zone */ if (i != r) { @@ -959,9 +959,9 @@ Status ZenFS::Mount(bool readonly) { } Status ZenFS::MkFS(std::string aux_fs_path, uint32_t finish_threshold) { - std::vector metazones = zbd_->GetMetaZones(); + std::vector metazones = zbd_->GetMetaZones(); std::unique_ptr log; - Zone* meta_zone = nullptr; + Zone *meta_zone = nullptr; IOStatus s; if (aux_fs_path.length() > 255) { @@ -986,7 +986,7 @@ Status ZenFS::MkFS(std::string aux_fs_path, uint32_t finish_threshold) { log.reset(new ZenMetaLog(zbd_, meta_zone)); - Superblock* super = new Superblock(zbd_, aux_fs_path, finish_threshold); + Superblock *super = new Superblock(zbd_, aux_fs_path, finish_threshold); std::string super_string; super->EncodeTo(&super_string); @@ -1008,7 +1008,7 @@ std::map ZenFS::GetWriteLifeTimeHints() { std::map hint_map; for (auto it = files_.begin(); it != files_.end(); it++) { - ZoneFile* zoneFile = it->second; + ZoneFile *zoneFile = it->second; std::string filename = it->first; hint_map.insert(std::make_pair(filename, zoneFile->GetWriteLifeTimeHint())); } @@ -1020,7 +1020,7 @@ std::map ZenFS::GetWriteLifeTimeHints() { static std::string GetLogFilename(std::string bdev) { std::ostringstream ss; time_t t = time(0); - struct tm* log_start = std::localtime(&t); + struct tm *log_start = std::localtime(&t); char buf[40]; std::strftime(buf, sizeof(buf), "%Y-%m-%d_%H:%M:%S.log", log_start); @@ -1030,7 +1030,7 @@ static std::string GetLogFilename(std::string bdev) { } #endif -Status NewZenFS(FileSystem** fs, const std::string& bdevname) { +Status NewZenFS(FileSystem **fs, const std::string &bdevname) { std::shared_ptr logger; Status s; @@ -1043,7 +1043,7 @@ Status NewZenFS(FileSystem** fs, const std::string& bdevname) { } #endif - ZonedBlockDevice* zbd = new ZonedBlockDevice(bdevname, logger); + ZonedBlockDevice *zbd = new ZonedBlockDevice(bdevname, logger); IOStatus zbd_status = zbd->Open(); if (!zbd_status.ok()) { Error(logger, "Failed to open zoned block device: %s", @@ -1051,7 +1051,7 @@ Status NewZenFS(FileSystem** fs, const std::string& bdevname) { return Status::IOError(zbd_status.ToString()); } - ZenFS* zenFS = new ZenFS(zbd, FileSystem::Default(), logger); + ZenFS *zenFS = new ZenFS(zbd, FileSystem::Default(), logger); s = zenFS->Mount(false); if (!s.ok()) { delete zenFS; @@ -1064,17 +1064,17 @@ Status NewZenFS(FileSystem** fs, const std::string& bdevname) { std::map ListZenFileSystems() { std::map zenFileSystems; - DIR* dir = opendir("/sys/class/block"); - struct dirent* entry; + DIR *dir = opendir("/sys/class/block"); + struct dirent *entry; while (NULL != (entry = readdir(dir))) { if (entry->d_type == DT_LNK) { std::string zbdName = std::string(entry->d_name); - ZonedBlockDevice* zbd = new ZonedBlockDevice(zbdName, nullptr); + ZonedBlockDevice *zbd = new ZonedBlockDevice(zbdName, nullptr); IOStatus zbd_status = zbd->Open(true); if (zbd_status.ok()) { - std::vector metazones = zbd->GetMetaZones(); + std::vector metazones = zbd->GetMetaZones(); std::string scratch; Slice super_record; Status s; @@ -1113,10 +1113,10 @@ extern "C" FactoryFunc zenfs_filesystem_reg; FactoryFunc zenfs_filesystem_reg = ObjectLibrary::Default()->Register( - "zenfs://.*", [](const std::string& uri, std::unique_ptr* f, - std::string* errmsg) { + "zenfs://.*", [](const std::string &uri, std::unique_ptr *f, + std::string *errmsg) { std::string devID = uri; - FileSystem* fs = nullptr; + FileSystem *fs = nullptr; Status s; devID.replace(0, strlen("zenfs://"), ""); diff --git a/fs/fs_zenfs.h b/fs/fs_zenfs.h index c89a3ed6..79efaec5 100644 --- a/fs/fs_zenfs.h +++ b/fs/fs_zenfs.h @@ -99,6 +99,7 @@ class ZenMetaLog { }; class ZenFS : public FileSystemWrapper { + friend class ZenFSGCWorker; ZonedBlockDevice* zbd_; std::map files_; std::mutex files_mtx_; diff --git a/fs/io_zenfs.cc b/fs/io_zenfs.cc index cfa2d112..92c7222b 100644 --- a/fs/io_zenfs.cc +++ b/fs/io_zenfs.cc @@ -23,13 +23,14 @@ #include #include +#include "fs_zenfs.h" #include "rocksdb/env.h" #include "util/coding.h" #include "zbd_zenfs.h" namespace ROCKSDB_NAMESPACE { -Status ZoneExtent::DecodeFrom(Slice* input) { +Status ZoneExtent::DecodeFrom(Slice *input) { if (input->size() != (sizeof(start_) + sizeof(length_))) return Status::Corruption("ZoneExtent", "Error: length missmatch"); @@ -38,7 +39,7 @@ Status ZoneExtent::DecodeFrom(Slice* input) { return Status::OK(); } -void ZoneExtent::EncodeTo(std::string* output) { +void ZoneExtent::EncodeTo(std::string *output) { PutFixed64(output, start_); PutFixed32(output, length_); } @@ -52,7 +53,7 @@ enum ZoneFileTag : uint32_t { kModificationTime = 6, }; -void ZoneFile::EncodeTo(std::string* output, uint32_t extent_start) { +void ZoneFile::EncodeTo(std::string *output, uint32_t extent_start) { PutFixed32(output, kFileID); PutFixed64(output, file_id_); @@ -79,7 +80,7 @@ void ZoneFile::EncodeTo(std::string* output, uint32_t extent_start) { * as files will always be read-only after mount */ } -Status ZoneFile::DecodeFrom(Slice* input) { +Status ZoneFile::DecodeFrom(Slice *input) { uint32_t tag = 0; GetFixed32(input, &tag); @@ -88,7 +89,7 @@ Status ZoneFile::DecodeFrom(Slice* input) { while (true) { Slice slice; - ZoneExtent* extent; + ZoneExtent *extent; Status s; if (!GetFixed32(input, &tag)) break; @@ -140,7 +141,7 @@ Status ZoneFile::DecodeFrom(Slice* input) { return Status::OK(); } -Status ZoneFile::MergeUpdate(ZoneFile* update) { +Status ZoneFile::MergeUpdate(ZoneFile *update) { if (file_id_ != update->GetID()) return Status::Corruption("ZoneFile update", "ID missmatch"); @@ -149,10 +150,10 @@ Status ZoneFile::MergeUpdate(ZoneFile* update) { SetWriteLifeTimeHint(update->GetWriteLifeTimeHint()); SetFileModificationTime(update->GetFileModificationTime()); - std::vector update_extents = update->GetExtents(); + std::vector update_extents = update->GetExtents(); for (long unsigned int i = 0; i < update_extents.size(); i++) { - ZoneExtent* extent = update_extents[i]; - Zone* zone = extent->zone_; + ZoneExtent *extent = update_extents[i]; + Zone *zone = extent->zone_; zone->used_capacity_ += extent->length_; extents_.push_back(new ZoneExtent(extent->start_, extent->length_, zone)); } @@ -162,7 +163,7 @@ Status ZoneFile::MergeUpdate(ZoneFile* update) { return Status::OK(); } -ZoneFile::ZoneFile(ZonedBlockDevice* zbd, std::string filename, +ZoneFile::ZoneFile(ZonedBlockDevice *zbd, std::string filename, uint64_t file_id) : zbd_(zbd), active_zone_(NULL), @@ -185,7 +186,7 @@ void ZoneFile::SetFileModificationTime(time_t mt) { m_time_ = mt; } ZoneFile::~ZoneFile() { for (auto e = std::begin(extents_); e != std::end(extents_); ++e) { - Zone* zone = (*e)->zone_; + Zone *zone = (*e)->zone_; assert(zone && zone->used_capacity_ >= (*e)->length_); zone->used_capacity_ -= (*e)->length_; @@ -206,7 +207,7 @@ void ZoneFile::OpenWR() { open_for_wr_ = true; } bool ZoneFile::IsOpenForWR() { return open_for_wr_; } -ZoneExtent* ZoneFile::GetExtent(uint64_t file_offset, uint64_t* dev_offset) { +ZoneExtent *ZoneFile::GetExtent(uint64_t file_offset, uint64_t *dev_offset) { for (unsigned int i = 0; i < extents_.size(); i++) { if (file_offset < extents_[i]->length_) { *dev_offset = extents_[i]->start_ + file_offset; @@ -218,16 +219,16 @@ ZoneExtent* ZoneFile::GetExtent(uint64_t file_offset, uint64_t* dev_offset) { return NULL; } -IOStatus ZoneFile::PositionedRead(uint64_t offset, size_t n, Slice* result, - char* scratch, bool direct) { +IOStatus ZoneFile::PositionedRead(uint64_t offset, size_t n, Slice *result, + char *scratch, bool direct) { int f = zbd_->GetReadFD(); int f_direct = zbd_->GetReadDirectFD(); - char* ptr; + char *ptr; uint64_t r_off; size_t r_sz; ssize_t r = 0; size_t read = 0; - ZoneExtent* extent; + ZoneExtent *extent; uint64_t extent_end; IOStatus s; @@ -298,7 +299,7 @@ IOStatus ZoneFile::PositionedRead(uint64_t offset, size_t n, Slice* result, read = 0; } - *result = Slice((char*)scratch, read); + *result = Slice((char *)scratch, read); return s; } @@ -321,7 +322,7 @@ void ZoneFile::PushExtent() { } /* Assumes that data and size are block aligned */ -IOStatus ZoneFile::Append(void* data, int data_size, int valid_size) { +IOStatus ZoneFile::Append(void *data, int data_size, int valid_size) { uint32_t left = data_size; uint32_t wr_size, offset = 0; IOStatus s; @@ -351,7 +352,7 @@ IOStatus ZoneFile::Append(void* data, int data_size, int valid_size) { wr_size = left; if (wr_size > active_zone_->capacity_) wr_size = active_zone_->capacity_; - s = active_zone_->Append((char*)data + offset, wr_size); + s = active_zone_->Append((char *)data + offset, wr_size); if (!s.ok()) return s; fileSize += wr_size; @@ -368,9 +369,9 @@ IOStatus ZoneFile::SetWriteLifeTimeHint(Env::WriteLifeTimeHint lifetime) { return IOStatus::OK(); } -ZonedWritableFile::ZonedWritableFile(ZonedBlockDevice* zbd, bool _buffered, - ZoneFile* zoneFile, - MetadataWriter* metadata_writer) { +ZonedWritableFile::ZonedWritableFile(ZonedBlockDevice *zbd, bool _buffered, + ZoneFile *zoneFile, + MetadataWriter *metadata_writer) { wp = zoneFile->GetFileSize(); assert(wp == 0); @@ -382,7 +383,8 @@ ZonedWritableFile::ZonedWritableFile(ZonedBlockDevice* zbd, bool _buffered, zoneFile_ = zoneFile; if (buffered) { - int ret = posix_memalign((void**)&buffer, sysconf(_SC_PAGESIZE), buffer_sz); + int ret = + posix_memalign((void **)&buffer, sysconf(_SC_PAGESIZE), buffer_sz); if (ret) buffer = nullptr; @@ -401,14 +403,14 @@ ZonedWritableFile::~ZonedWritableFile() { ZonedWritableFile::MetadataWriter::~MetadataWriter() {} IOStatus ZonedWritableFile::Truncate(uint64_t size, - const IOOptions& /*options*/, - IODebugContext* /*dbg*/) { + const IOOptions & /*options*/, + IODebugContext * /*dbg*/) { zoneFile_->SetFileSize(size); return IOStatus::OK(); } -IOStatus ZonedWritableFile::Fsync(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) { +IOStatus ZonedWritableFile::Fsync(const IOOptions & /*options*/, + IODebugContext * /*dbg*/) { IOStatus s; buffer_mtx_.lock(); @@ -422,26 +424,26 @@ IOStatus ZonedWritableFile::Fsync(const IOOptions& /*options*/, return metadata_writer_->Persist(zoneFile_); } -IOStatus ZonedWritableFile::Sync(const IOOptions& options, - IODebugContext* dbg) { +IOStatus ZonedWritableFile::Sync(const IOOptions &options, + IODebugContext *dbg) { return Fsync(options, dbg); } -IOStatus ZonedWritableFile::Flush(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) { +IOStatus ZonedWritableFile::Flush(const IOOptions & /*options*/, + IODebugContext * /*dbg*/) { return IOStatus::OK(); } IOStatus ZonedWritableFile::RangeSync(uint64_t offset, uint64_t nbytes, - const IOOptions& options, - IODebugContext* dbg) { + const IOOptions &options, + IODebugContext *dbg) { if (wp < offset + nbytes) return Fsync(options, dbg); return IOStatus::OK(); } -IOStatus ZonedWritableFile::Close(const IOOptions& options, - IODebugContext* dbg) { +IOStatus ZonedWritableFile::Close(const IOOptions &options, + IODebugContext *dbg) { Fsync(options, dbg); zoneFile_->CloseWR(); @@ -457,10 +459,10 @@ IOStatus ZonedWritableFile::FlushBuffer() { align = buffer_pos % block_sz; if (align) pad_sz = block_sz - align; - if (pad_sz) memset((char*)buffer + buffer_pos, 0x0, pad_sz); + if (pad_sz) memset((char *)buffer + buffer_pos, 0x0, pad_sz); wr_sz = buffer_pos + pad_sz; - s = zoneFile_->Append((char*)buffer, wr_sz, buffer_pos); + s = zoneFile_->Append((char *)buffer, wr_sz, buffer_pos); if (!s.ok()) { return s; } @@ -471,14 +473,14 @@ IOStatus ZonedWritableFile::FlushBuffer() { return IOStatus::OK(); } -IOStatus ZonedWritableFile::BufferedWrite(const Slice& slice) { +IOStatus ZonedWritableFile::BufferedWrite(const Slice &slice) { uint32_t buffer_left = buffer_sz - buffer_pos; uint32_t data_left = slice.size(); - char* data = (char*)slice.data(); + char *data = (char *)slice.data(); uint32_t tobuffer; int blocks, aligned_sz; int ret; - void* alignbuf; + void *alignbuf; IOStatus s; if (buffer_pos || data_left <= buffer_left) { @@ -530,9 +532,9 @@ IOStatus ZonedWritableFile::BufferedWrite(const Slice& slice) { return IOStatus::OK(); } -IOStatus ZonedWritableFile::Append(const Slice& data, - const IOOptions& /*options*/, - IODebugContext* /*dbg*/) { +IOStatus ZonedWritableFile::Append(const Slice &data, + const IOOptions & /*options*/, + IODebugContext * /*dbg*/) { IOStatus s; if (buffered) { @@ -540,16 +542,16 @@ IOStatus ZonedWritableFile::Append(const Slice& data, s = BufferedWrite(data); buffer_mtx_.unlock(); } else { - s = zoneFile_->Append((void*)data.data(), data.size(), data.size()); + s = zoneFile_->Append((void *)data.data(), data.size(), data.size()); if (s.ok()) wp += data.size(); } return s; } -IOStatus ZonedWritableFile::PositionedAppend(const Slice& data, uint64_t offset, - const IOOptions& /*options*/, - IODebugContext* /*dbg*/) { +IOStatus ZonedWritableFile::PositionedAppend(const Slice &data, uint64_t offset, + const IOOptions & /*options*/, + IODebugContext * /*dbg*/) { IOStatus s; if (offset != wp) { @@ -562,7 +564,7 @@ IOStatus ZonedWritableFile::PositionedAppend(const Slice& data, uint64_t offset, s = BufferedWrite(data); buffer_mtx_.unlock(); } else { - s = zoneFile_->Append((void*)data.data(), data.size(), data.size()); + s = zoneFile_->Append((void *)data.data(), data.size(), data.size()); if (s.ok()) wp += data.size(); } @@ -573,9 +575,9 @@ void ZonedWritableFile::SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) { zoneFile_->SetWriteLifeTimeHint(hint); } -IOStatus ZonedSequentialFile::Read(size_t n, const IOOptions& /*options*/, - Slice* result, char* scratch, - IODebugContext* /*dbg*/) { +IOStatus ZonedSequentialFile::Read(size_t n, const IOOptions & /*options*/, + Slice *result, char *scratch, + IODebugContext * /*dbg*/) { IOStatus s; s = zoneFile_->PositionedRead(rp, n, result, scratch, direct_); @@ -592,20 +594,20 @@ IOStatus ZonedSequentialFile::Skip(uint64_t n) { } IOStatus ZonedSequentialFile::PositionedRead(uint64_t offset, size_t n, - const IOOptions& /*options*/, - Slice* result, char* scratch, - IODebugContext* /*dbg*/) { + const IOOptions & /*options*/, + Slice *result, char *scratch, + IODebugContext * /*dbg*/) { return zoneFile_->PositionedRead(offset, n, result, scratch, direct_); } IOStatus ZonedRandomAccessFile::Read(uint64_t offset, size_t n, - const IOOptions& /*options*/, - Slice* result, char* scratch, - IODebugContext* /*dbg*/) const { + const IOOptions & /*options*/, + Slice *result, char *scratch, + IODebugContext * /*dbg*/) const { return zoneFile_->PositionedRead(offset, n, result, scratch, direct_); } -size_t ZoneFile::GetUniqueId(char* id, size_t max_size) { +size_t ZoneFile::GetUniqueId(char *id, size_t max_size) { /* Based on the posix fs implementation */ if (max_size < kMaxVarint64Length * 3) { return 0; @@ -618,7 +620,7 @@ size_t ZoneFile::GetUniqueId(char* id, size_t max_size) { return 0; } - char* rid = id; + char *rid = id; rid = EncodeVarint64(rid, buf.st_dev); rid = EncodeVarint64(rid, buf.st_ino); rid = EncodeVarint64(rid, file_id_); @@ -628,10 +630,70 @@ size_t ZoneFile::GetUniqueId(char* id, size_t max_size) { return 0; } -size_t ZonedRandomAccessFile::GetUniqueId(char* id, size_t max_size) const { +size_t ZonedRandomAccessFile::GetUniqueId(char *id, size_t max_size) const { return zoneFile_->GetUniqueId(id, max_size); } +ZenFSGCWorker::ZenFSGCWorker() { total_residue_ = 0; } + +void ZenFSGCWorker::CheckZoneValidResidualData() { + std::map::iterator it; + fs->files_mtx_.lock(); + for (it = fs->files_.begin(); it != fs->files_.end(); it++) { + ZoneFile *existFile; + existFile = it->second; + + for (auto ext_it : existFile->extents_) { + ZoneExtent *extent; + extent = ext_it; + + Zone *zone_idx = extent->zone_; + // only care about the FULL zone. + if (!zone_idx->IsFull()) { + break; + } + + zone_residue[zone_idx] += extent->length_; + total_residue_ += extent->length_; + extent_list.push_back(extent); + } + + files_moved_to_dst_zone.push_back(existFile); + } + fs->files_mtx_.unlock(); +} + +void ZenFSGCWorker::ZoneResetToReclaim() { + std::vector::iterator zone_it; + for (zone_it = merge_zone_list.begin(); zone_it != merge_zone_list.end(); + zone_it++) { + Zone *zone_idx; + zone_idx = *zone_it; + + IOStatus s; + s = zone_idx->Reset(); + if (!s.ok()) { + // Debug(logger_, "Failed resetting zone when executing GC!"); + } + } +} + +std::vector ZenFSGCWorker::MarkZonesToMergeData() { + merge_zone_list = zbd_->GetReclaimZones(); + return merge_zone_list; +} + +std::vector ZenFSGCWorker::GetDestZoneToMoveValidData( + uint64_t ttl_residue) { + while (ttl_residue) { + Zone *zone = zbd_->AllocateZone(Env::WLTH_NOT_SET); + dst_zone_list.push_back(zone); + ttl_residue -= (zone->max_capacity_ >= ttl_residue) ? ttl_residue + : zone->max_capacity_; + } + return dst_zone_list; +} + } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) && !defined(OS_WIN) diff --git a/fs/io_zenfs.h b/fs/io_zenfs.h index a096cebf..659f8f3e 100644 --- a/fs/io_zenfs.h +++ b/fs/io_zenfs.h @@ -25,6 +25,8 @@ namespace ROCKSDB_NAMESPACE { +class ZenFS; + class ZoneExtent { public: uint64_t start_; @@ -37,6 +39,8 @@ class ZoneExtent { }; class ZoneFile { + friend class ZenFSGCWorker; + protected: ZonedBlockDevice* zbd_; std::vector extents_; @@ -226,6 +230,38 @@ class ZonedRandomAccessFile : public FSRandomAccessFile { size_t GetUniqueId(char* id, size_t max_size) const override; }; +class ZenFSGCWorker { + // if below variable 'fs' cannot reference to ZenFS, then we have to populate + // it in the initiator!!! + ZenFS* fs; // friend class, used to check the zonefile name + + ZonedBlockDevice* zbd_; + + std::map + zone_residue; // record each zone's residual data size + std::vector files_moved_to_dst_zone; + + std::atomic + total_residue_; // Is atomic necessary since only one thread at one time? + + std::vector merge_zone_list; + std::vector extent_list; // To store the Extent list of marked + // zone, used to move the Extent data + std::vector dst_zone_list; // It is possible for residual data to be + // larger than Zone Capacity + + public: + explicit ZenFSGCWorker(); // need to init + + void CheckZoneValidResidualData(); // work for below functions + + std::vector MarkZonesToMergeData(); + std::vector GetDestZoneToMoveValidData(uint64_t ttl_residue); + void MoveValidDataToNewDestZone(); // extent_list and dst_zone_list + void ZoneResetToReclaim(); // merge_zone_list + void UpdateMetadataAfterMerge(); // files_moved_to_dst_zone +}; + } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) && defined(OS_LINUX) diff --git a/fs/zbd_zenfs.cc b/fs/zbd_zenfs.cc index e66a3107..2a5090ec 100644 --- a/fs/zbd_zenfs.cc +++ b/fs/zbd_zenfs.cc @@ -163,6 +163,19 @@ Zone *ZonedBlockDevice::GetIOZone(uint64_t offset) { return nullptr; } +std::vector ZonedBlockDevice::GetReclaimZones(void) { + std::vector GCReclaimZones; + for (const auto z : io_zones) { + if ((!z->IsUsed()) && (!z->IsFull())) continue; + /* valid data in a zone is larger than 10% max capacity */ + if ((z->max_capacity_ / z->used_capacity_ >= 10)) { + GCReclaimZones.push_back(z); + }; + } + + return GCReclaimZones; +} + ZonedBlockDevice::ZonedBlockDevice(std::string bdevname, std::shared_ptr logger) : filename_("/dev/" + bdevname), logger_(logger) { @@ -474,6 +487,7 @@ Zone *ZonedBlockDevice::AllocateZone(Env::WriteLifeTimeHint file_lifetime) { if (z->open_for_write_ || z->IsEmpty() || (z->IsFull() && z->IsUsed())) continue; + /* reset unused zone () */ if (!z->IsUsed()) { if (!z->IsFull()) active_io_zones_--; s = z->Reset(); @@ -557,7 +571,6 @@ Zone *ZonedBlockDevice::AllocateZone(Env::WriteLifeTimeHint file_lifetime) { std::string ZonedBlockDevice::GetFilename() { return filename_; } uint32_t ZonedBlockDevice::GetBlockSize() { return block_sz_; } - } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) && !defined(OS_WIN) diff --git a/fs/zbd_zenfs.h b/fs/zbd_zenfs.h index 99e34f78..d23f8e8b 100644 --- a/fs/zbd_zenfs.h +++ b/fs/zbd_zenfs.h @@ -112,6 +112,7 @@ class ZonedBlockDevice { uint64_t GetZoneSize() { return zone_sz_; } uint32_t GetNrZones() { return nr_zones_; } std::vector GetMetaZones() { return meta_zones; } + std::vector GetReclaimZones(); void SetFinishTreshold(uint32_t threshold) { finish_threshold_ = threshold; }