From 4c91e394f0197157cd39535d301a715edc1555af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=B6=E7=82=AB=20=E5=88=98?= <1928099560@qq.com> Date: Wed, 14 May 2025 20:51:06 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AF=B9=E8=B1=A1=E6=B1=A0=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- asco/asco_main.cpp | 9 +- asco/core/object_pool.h | 202 ++++++++++++++++++++++++++++++++++++++++ asco/core/timer.h | 2 + asco/future.h | 7 +- asco/sync/spin.h | 16 ++-- asco/utils/page_map.h | 106 +++++++++++++++++++++ tests/futurets.cpp | 8 +- 7 files changed, 336 insertions(+), 14 deletions(-) create mode 100644 asco/core/object_pool.h create mode 100644 asco/utils/page_map.h diff --git a/asco/asco_main.cpp b/asco/asco_main.cpp index 41e3d53..8182f12 100644 --- a/asco/asco_main.cpp +++ b/asco/asco_main.cpp @@ -1,4 +1,7 @@ -#include +#ifdef __linux__ +# include +#endif + #include #include #include @@ -26,6 +29,7 @@ int main(int argc, const char **argv, const char **env) { })}; runtime::sys::set_args(argc, argv); runtime::sys::set_env(const_cast(env)); +#ifdef __linux__ try { return async_main().await(); } catch (std::exception &e) { @@ -36,4 +40,7 @@ int main(int argc, const char **argv, const char **env) { << std::format(" what(): {}", e.what()) << std::endl; return -1; } +#else + return async_main().await(); +#endif } diff --git a/asco/core/object_pool.h b/asco/core/object_pool.h new file mode 100644 index 0000000..0d987c6 --- /dev/null +++ b/asco/core/object_pool.h @@ -0,0 +1,202 @@ +#ifndef ASCO_OBJECT_POLL_H +#define ASCO_OBJECT_POLL_H 1 + +#include +#include +#include +#include + +// namespace asco { +// const size_t align_num = 16; +// const size_t alloc_space = 1024 * 1024; + +// template +// class object_pool { +// public: +// static object_pool &get_pool() { return _object_pool; } + +// void *alloc(size_t n) { +// // 获取对应的memory内存块 +// auto guard = freelists.lock(); +// if (guard->find(n) == guard->end()) +// guard->emplace(n, nullptr); +// auto &freelist = (guard->find(n))->second; + +// void *ret = nullptr; +// if (freelist != nullptr) { +// // 如果回收站有空间,直接将内存块从回收站中取出使用 +// ret = (std::byte *)freelist +// + align_num; // 前16byte的空间用于存放地址(防止用户的内存块多占用缓冲行) +// freelist = *(void **)freelist; +// return ret; +// } +// // 计算实际需要的内存大小 +// size_t obj_len = align_num + align_byte(n); +// // 若回收站没有空间,则从memory中取空间构造 +// auto mem_guard = memory.lock(); +// if (leftMemLen < obj_len) { +// // 如果memory中空间不够用了,则重新开辟一块空间 +// leftMemLen = alloc_space; +// *mem_guard = (std::byte *)(::operator new(alloc_space)); +// if (!*mem_guard) { +// throw std::bad_alloc(); +// } +// } +// ret = (std::byte *)(*mem_guard) + align_num; +// // 保证可以存下单个地址长度以便建立链表 +// (*mem_guard) += obj_len; +// leftMemLen -= obj_len; +// return ret; +// } + +// void dealloc(void *obj, size_t n) { +// // 获取对应的自由链表 +// auto freelist = freelists.lock()->find(n)->second; +// // 头插法将freelist的地址存放在obj首个地址长度当中 +// void *actual_addr = (std::byte *)obj - align_num; +// *(void **)(actual_addr) = freelist; +// freelist = actual_addr; +// } + +// // 最终释放内存 +// ~object_pool() {} + +// private: +// object_pool() {} +// size_t align_byte(size_t n) { +// return (n + align_num - 1) & ~(align_num - 1); // 内存对齐至16byte +// } +// object_pool(const object_pool &) = delete; +// object_pool &operator=(const object_pool &) = delete; +// object_pool(object_pool &&) = delete; +// object_pool &operator=(object_pool &&) = delete; + +// private: +// static object_pool _object_pool; +// // std::byte *memory = nullptr; +// size_t leftMemLen = 0; +// // void *freelist = nullptr; +// spin memory; +// spin> freelists; +// }; + +// template +// inline object_pool object_pool::_object_pool; +// } // namespace asco + +#include +#include +#include +#include +#include + +namespace asco { +constexpr size_t align_num = 16; +constexpr size_t kMaxSmallSize = 256; +constexpr size_t kNumClasses = kMaxSmallSize / align_num; +constexpr size_t kBatchAllocSize = 1024 * 1024; + +template +class object_pool { + struct memory_block { + char* ptr; + size_t size; + }; + +public: + static object_pool& get_pool() { + static object_pool instance; + return instance; + } + + void* alloc(size_t size) { + if (size > kMaxSmallSize) { + return alloc_large(size); + } + + // 1. 尝试从线程本地缓存分配 + size_t class_id = size_to_class_id(size); + if (tls_freelist_[class_id]) { + void* obj = tls_freelist_[class_id]; + tls_freelist_[class_id] = *(void**)obj; + return obj; + } + + // 2. 从全局池批量补充 + return refill(class_id, class_id_to_size(class_id)); + } + + void dealloc(void* obj, size_t size) { + if (size > kMaxSmallSize) { + dealloc_large(obj, size); + return; + } + + // 回收至线程本地缓存 + size_t class_id = size_to_class_id(size); + *(void**)obj = tls_freelist_[class_id]; + tls_freelist_[class_id] = obj; + } + +private: + // 大小分级转换 + static size_t size_to_class_id(size_t size) { + return (size + align_num - 1) / align_num; + } + static size_t class_id_to_size(size_t class_id) { + return class_id * align_num; + } + + // 从全局池补充本地缓存 + void* refill(size_t class_id, size_t size) { + constexpr int kBatchSize = 32; // 每次补充32个对象 + + std::lock_guard lock(mutex_); + char* chunk = alloc_chunk(size * kBatchSize); + + // 构建自由链表 + for (int i = 0; i < kBatchSize - 1; ++i) { + void** next = reinterpret_cast(chunk + i * size); + *next = chunk + (i + 1) * size; + } + + *(void**)(chunk + (kBatchSize - 1) * size) = nullptr; + tls_freelist_[class_id] = chunk + size; // 返回第一个对象 + return chunk; + } + + // 大对象直接分配 + void* alloc_large(size_t size) { + return ::operator new(size); + } + void dealloc_large(void* obj, size_t size) { + ::operator delete(obj); + } + + // 分配连续内存块 + char* alloc_chunk(size_t size) { + if (current_block_.size < size) { + current_block_.ptr = static_cast(::operator new(kBatchAllocSize)); + current_block_.size = kBatchAllocSize; + blocks_.push_back(current_block_.ptr); + } + + char* result = current_block_.ptr; + current_block_.ptr += size; + current_block_.size -= size; + return result; + } + + // 线程本地缓存 + inline thread_local static std::array tls_freelist_; + + // 全局内存块管理 + memory_block current_block_; + std::vector blocks_; + std::mutex mutex_; +}; + +} // namespace asco + + +#endif \ No newline at end of file diff --git a/asco/core/timer.h b/asco/core/timer.h index cb8f4a4..426e370 100644 --- a/asco/core/timer.h +++ b/asco/core/timer.h @@ -45,7 +45,9 @@ class timer { atomic_bool init_waiter{false}; std::jthread timerthr; +#ifdef __linux__ ::pthread_t ptid; +#endif // Too short for both sleep or spin wait under this duration, merge two awake_point into one. constexpr static nanoseconds approx_time = 30ns; diff --git a/asco/future.h b/asco/future.h index 94266a8..772f828 100644 --- a/asco/future.h +++ b/asco/future.h @@ -14,6 +14,7 @@ #include #include #include +#include #if defined(_MSC_VER) && !defined(__clang__) # error "[ASCO] Compile with clang-cl instead of MSVC" @@ -56,14 +57,16 @@ struct future_base { size_t caller_task_id{0}; void *operator new(size_t n) noexcept { - auto *p = static_cast(::operator new(n + sizeof(size_t))); + // auto *p = static_cast(::operator new(n + sizeof(size_t))); + size_t *p = static_cast(object_pool::get_pool().alloc(n + sizeof(size_t))); *p = n; return p + 1; } void operator delete(void *p) noexcept { size_t *q = static_cast(p) - 1; - ::operator delete(q); + object_pool::get_pool().dealloc(q, *q + sizeof(size_t)); + // ::operator delete(q); } template diff --git a/asco/sync/spin.h b/asco/sync/spin.h index 7a4104d..94fba5e 100644 --- a/asco/sync/spin.h +++ b/asco/sync/spin.h @@ -9,12 +9,13 @@ namespace asco::sync { template -class spin : private T { +class spin { public: - using T::T; - spin(const spin &) = delete; spin(spin &&) = delete; + template + explicit spin(Args &&...args) + : value(std::forward(args)...) {} class guard { spin &s; @@ -28,13 +29,13 @@ class spin : private T { ~guard() { s.locked.store(false, morder::release); } - T &operator*() { return s; } + T &operator*() { return s.value; } - const T &operator*() const { return s; } + const T &operator*() const { return s.value; } - T *operator->() { return &s; } + T *operator->() { return &s.value; } - const T *operator->() const { return &s; } + const T *operator->() const { return &s.value; } }; guard lock() { return guard{*this}; } @@ -45,6 +46,7 @@ class spin : private T { private: atomic_bool locked{false}; + T value; }; }; // namespace asco::sync diff --git a/asco/utils/page_map.h b/asco/utils/page_map.h new file mode 100644 index 0000000..ab4cc25 --- /dev/null +++ b/asco/utils/page_map.h @@ -0,0 +1,106 @@ +#ifndef PAGEMAP_H +#define PAGEMAP_H 1 + +#include + +namespace asco { +inline std::function alloc_func = [](size_t size) { return ::operator new(size); }; + +template +class pagemap3 { +private: + // How many bits should we consume at each interior level + static const int interior_bits = (BITS + 2) / 3; // Round-up + static const int interior_length = 1 << interior_bits; + + // How many bits should we consume at leaf level + static const int leaf_bits = BITS - 2 * interior_bits; + static const int LEAF_LENGTH = 1 << leaf_bits; + + // Interior node + struct node { + node *ptrs[interior_length]; + }; + + // Leaf node + struct leaf { + void *values[LEAF_LENGTH]; + }; + + node *root_; // Root of radix tree + std::function allocator_; // Memory allocator + + node *new_node() { + node *result = reinterpret_cast(allocator_(sizeof(node))); + if (result != NULL) { + memset(result, 0, sizeof(*result)); + } + return result; + } + +public: + using number = uintptr_t; + + explicit pagemap3(std::function allocator = alloc_func) { + allocator_ = allocator; + root_ = new_node(); + } + + void *get(number k) const { + const number i1 = k >> (leaf_bits + interior_bits); + const number i2 = (k >> leaf_bits) & (interior_length - 1); + const number i3 = k & (LEAF_LENGTH - 1); + if ((k >> BITS) > 0 || root_->ptrs[i1] == NULL || root_->ptrs[i1]->ptrs[i2] == NULL) { + return NULL; + } + return reinterpret_cast(root_->ptrs[i1]->ptrs[i2])->values[i3]; + } + + void set(number k, void *v) { + assert(k >> BITS == 0); + const number i1 = k >> (leaf_bits + interior_bits); + const number i2 = (k >> leaf_bits) & (interior_length - 1); + const number i3 = k & (LEAF_LENGTH - 1); + reinterpret_cast(root_->ptrs[i1]->ptrs[i2])->values[i3] = v; + } + + void ensure_set(number k, void *v) { + ensure(k, 1); + set(k, v); + } + + bool ensure(number start, size_t n) { + for (number key = start; key <= start + n - 1;) { + const number i1 = key >> (leaf_bits + interior_bits); + const number i2 = (key >> leaf_bits) & (interior_length - 1); + + // Check for overflow + if (i1 >= interior_length || i2 >= interior_length) + assert(false); + + // Make 2nd level node if necessary + if (root_->ptrs[i1] == NULL) { + node *n = new_node(); + if (n == NULL) + return false; + root_->ptrs[i1] = n; + } + + // Make leaf node if necessary + if (root_->ptrs[i1]->ptrs[i2] == NULL) { + leaf *leaf = reinterpret_cast(allocator_(sizeof(leaf))); + if (leaf == NULL) + return false; + memset(leaf, 0, sizeof(*leaf)); + root_->ptrs[i1]->ptrs[i2] = reinterpret_cast(leaf); + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> leaf_bits) + 1) << leaf_bits; + } + return true; + } +}; +} // namespace asco + +#endif \ No newline at end of file diff --git a/tests/futurets.cpp b/tests/futurets.cpp index fb0ce0e..82844a7 100644 --- a/tests/futurets.cpp +++ b/tests/futurets.cpp @@ -11,7 +11,7 @@ using asco::future, asco::future_inline, asco::future_void_core; future_inline foo(uint64_t i) { char coro_local(y); char *coro_local(str); - std::cout << "foo " << y; + // std::cout << "foo " << y; y = i % 26 + 'a'; co_return i; } @@ -20,12 +20,12 @@ future_void_core bar() { char decl_local(y, new char{'a'}); char *decl_local_array(str, new char[10]); uint64_t s = 0; - for (uint64_t i = 1; i <= 100000; i++) { + for (uint64_t i = 1; i <= 1000000; i++) { auto x = co_await foo(i); assert(x == i); s += x; - std::cout << ' ' << y << std::endl; - std::cout << x << " : " << s << std::endl; + // std::cout << ' ' << y << std::endl; + // std::cout << x << " : " << s << std::endl; y = 'a'; } co_return {};