From c85f1f18c403fbf5c985ad4bb81e0b36e0477f79 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sat, 1 Mar 2025 22:05:43 +0100 Subject: [PATCH 01/19] Add Environment Manager --- CMakeLists.txt | 2 +- DataSources/DataSource.h | 7 +- EnvironmentReconfigure/EnvironmentManager.cpp | 162 ++++++++++++++++++ EnvironmentReconfigure/EnvironmentManager.h | 105 ++++++++++++ main.cpp | 57 +++--- 5 files changed, 295 insertions(+), 38 deletions(-) create mode 100644 EnvironmentReconfigure/EnvironmentManager.cpp create mode 100644 EnvironmentReconfigure/EnvironmentManager.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 45b2fb2..3537c06 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ ENDIF (Boost_FOUND) SET(USED_LIBS ${Boost_SYSTEM_LIBRARY} ${Boost_THREAD_LIBRARY} ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_REGEX_LIBRARY} ${ZSTD_LIBRARY_PATH} ${SNAPPY_LIBRARY_PATH}) #add_definitions(-DUSE_SYSTEM_ABSEIL=OFF) -add_executable(${PROJECT_NAME} main.cpp xdbcserver.cpp ControllerInterface/WebSocketClient.cpp Compression/Compressor.cpp DataSources/PGReader/PGReader.cpp DataSources/CHReader/CHReader.cpp DataSources/CSVReader/CSVReader.cpp DataSources/PQReader/PQReader.cpp DataSources/DataSource.cpp) +add_executable(${PROJECT_NAME} main.cpp xdbcserver.cpp EnvironmentReconfigure/EnvironmentManager.cpp ControllerInterface/WebSocketClient.cpp Compression/Compressor.cpp DataSources/PGReader/PGReader.cpp DataSources/CHReader/CHReader.cpp DataSources/CSVReader/CSVReader.cpp DataSources/PQReader/PQReader.cpp DataSources/DataSource.cpp) #add_library(clickhouse-cpp SHARED IMPORTED) #set_property(TARGET clickhouse-cpp PROPERTY IMPORTED_LOCATION "/usr/local/lib/libclickhouse-cpp-lib.so") target_link_libraries(${PROJECT_NAME} PRIVATE clickhouse-cpp-lib -lpqxx -lpq ${USED_LIBS} lzo2 lz4 cityhash spdlog::spdlog ZLIB::ZLIB FastPFOR::FastPFOR ${FPZIP_LIBRARY} ${ARROW_LIBRARIES} -larrow ${PARQUET_SHARED_LIB}) \ No newline at end of file diff --git a/DataSources/DataSource.h b/DataSources/DataSource.h index 1966525..87a5678 100644 --- a/DataSources/DataSource.h +++ b/DataSources/DataSource.h @@ -6,7 +6,8 @@ #include #include #include -#include "../customQueue.h" +#include "../EnvironmentReconfigure/EnvironmentManager.h" +// #include "../customQueue.h" // #define BUFFER_SIZE 1000 // #define BUFFERPOOL_SIZE 1000 @@ -36,6 +37,7 @@ typedef std::shared_ptr> PTQ_ptr; struct transfer_details { float elapsed_time = 0.0f; // Default value for elapsed_time + std::tuple latest_queueSizes; }; struct RuntimeEnv { @@ -77,6 +79,9 @@ struct RuntimeEnv { int spawn_source; std::atomic enable_updation; transfer_details tf_paras; + int max_threads = 16; + EnvironmentManager env_manager; + PTQ_ptr pts; }; diff --git a/EnvironmentReconfigure/EnvironmentManager.cpp b/EnvironmentReconfigure/EnvironmentManager.cpp new file mode 100644 index 0000000..3cf9790 --- /dev/null +++ b/EnvironmentReconfigure/EnvironmentManager.cpp @@ -0,0 +1,162 @@ +// EnvironmentManager.cpp +#include "EnvironmentManager.h" + +EnvironmentManager::EnvironmentManager() : terminate_(false), config_update_(false), config_over_(false) {} + +EnvironmentManager::~EnvironmentManager() +{ + stop(); // Ensure all threads are stopped before destruction +} + +void EnvironmentManager::registerOperation(const std::string &name, Task task, std::shared_ptr> poisonQueue) +{ + std::unique_lock lock(mutex_); + operations_[name] = {task, poisonQueue, 0, 0}; + // cv_.notify_all(); // Notify that a new operation is registered +} + +void EnvironmentManager::configureThreads(const std::string &name, int new_thread_count) +{ + std::unique_lock lock(mutex_); + auto it = operations_.find(name); + if (it != operations_.end()) + { + it->second.desired_threads = new_thread_count; + config_update_ = true; + config_over_ = false; + cv_.notify_all(); + + // Wait until all requested threads are actually started + + cv_.wait(lock, [this] + { return config_over_.load(); }); + } +} + +void EnvironmentManager::start() +{ + reconfig_thread_ = std::thread(&EnvironmentManager::run, this); +} + +void EnvironmentManager::joinThreads(const std::string &name) +{ + std::unique_lock lock(mutex_); + auto it = operations_.find(name); + if (it != operations_.end()) + { + Operation &op = it->second; + + for (auto &thread : op.threads) + { + if (thread.joinable()) + { + thread.join(); // Wait for the thread to finish + } + else + { + spdlog::info("Thread with ID: {} is not joinable.", std::hash{}(thread.get_id())); + } + } + + op.threads.clear(); // Clear the threads after joining + op.active_threads = 0; // Reset the active thread count + op.desired_threads = 0; // Reset the desired thread count + } + else + { + spdlog::warn("Operation '{}' not found. No threads to join.", name); + } +} + +void EnvironmentManager::stop() +{ + { + std::unique_lock lock(mutex_); + terminate_ = true; + cv_.notify_all(); + } + + if (reconfig_thread_.joinable()) + { + reconfig_thread_.join(); + } + + // Join all threads before exiting + for (auto &op : operations_) + { + for (auto &t : op.second.threads) + { + if (t.joinable()) + { + t.join(); + } + } + } +} + +void EnvironmentManager::run() +{ + while (!terminate_) + { + std::unique_lock lock(mutex_); + cv_.wait(lock, [this] + { return terminate_.load() || config_update_; }); + + config_update_ = false; + + for (auto &[name, operation] : operations_) + { + int delta_threads = operation.desired_threads - operation.active_threads; + + if (delta_threads > 0) + { + + for (int i = 0; i < delta_threads; ++i) + { + int thread_id = operation.active_threads + i; + + if (!operation.task) + { + spdlog::error("Task is null for operation {}", name); + continue; + } + + // Push a new thread instead of accessing via index + operation.threads.emplace_back([this, task = operation.task, thread_id, name] + { + try + { + task(thread_id); + } + catch (const std::exception &e) + { + spdlog::error("Exception in thread {}: {}", thread_id, e.what()); + } + catch (...) + { + spdlog::error("Unknown exception in thread {}", thread_id); + } }); + } + spdlog::info("Reconfigure thread for operation {0} by {1}", name, delta_threads); + } + else if (delta_threads < 0) + { + + for (int i = 0; i < -delta_threads; ++i) + { + if (!operation.poisonQueue) + { + spdlog::error("poisonQueue is null for operation {}", name); + continue; + } + operation.poisonQueue->push(-1); + } + spdlog::info("Reconfigure thread for operation {0} by {1}", name, delta_threads); + } + + operation.active_threads = operation.desired_threads; + } + config_over_ = true; + cv_.notify_all(); + } +} diff --git a/EnvironmentReconfigure/EnvironmentManager.h b/EnvironmentReconfigure/EnvironmentManager.h new file mode 100644 index 0000000..dbdf242 --- /dev/null +++ b/EnvironmentReconfigure/EnvironmentManager.h @@ -0,0 +1,105 @@ +// EnvironmentManager.h +#ifndef ENVIRONMENTMANAGER_H +#define ENVIRONMENTMANAGER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../customQueue.h" + +class EnvironmentManager +{ +public: + using Task = std::function; + + // Constructor + EnvironmentManager(); + + // Destructor + ~EnvironmentManager(); + + // Register an operation (e.g., write, decompress) + void registerOperation(const std::string &name, Task task, std::shared_ptr> poisonQueue); + + // Configure the number of threads for an operation + void configureThreads(const std::string &name, int new_thread_count); + + // Start the reconfiguration manager + void start(); + + // Stop the reconfiguration manager and all threads + void stop(); + + // Join all threads for a specific operation + void joinThreads(const std::string &name); + +private: + struct Operation + { + Task task; + std::shared_ptr> poisonQueue; + int active_threads = 0; + int desired_threads = 0; + std::vector threads; + }; + + void run(); // Main loop that handles thread creation and termination + + std::unordered_map operations_; + std::mutex mutex_; + std::condition_variable cv_; + std::atomic terminate_; + std::atomic config_update_; + std::atomic config_over_; + std::thread reconfig_thread_; +}; + +// Sample code for user: +/* + EnvironmentManager reconfig_manager; + +// Register operations with lambdas to bind arguments + +// Register ANALYTICS operation with specific arguments +reconfig_manager.registerOperation("ANALYTICS", + [&](int thr) { + int min = 0, max = 0; + long sum = 0, cnt = 0, totalcnt = 0; + analyticsThread(thr, min, max, sum, cnt, totalcnt); // Using thread index 'thr' and other arguments + }, + writeBufferIds); + +// Register STORAGE operation with specific arguments +reconfig_manager.registerOperation("STORAGE", + [&](int thr) { + std::string filename = "data_file"; // Dynamically generate filename based on thread ID + storageThread(thr, filename); // Using thread index 'thr' and filename + }, + decompressedBufferIds); + +// Start the reconfiguration manager +reconfig_manager.start(); + +// Configure threads dynamically +reconfig_manager.configureThreads("ANALYTICS", 5); // Start 5 threads for analytics +reconfig_manager.configureThreads("STORAGE", 3); // Start 3 threads for storage + +// Simulate reconfiguration at runtime +reconfig_manager.configureThreads("ANALYTICS", 2); // Reduce threads for ANALYTICS +reconfig_manager.configureThreads("STORAGE", 4); // Increase threads for STORAGE + +// Join threads for both operations +reconfig_manager.joinThreads("ANALYTICS"); +reconfig_manager.joinThreads("STORAGE"); + +// Stop the manager and all threads +reconfig_manager.stop(); +*/ + +#endif // EnvironmentManager_H diff --git a/main.cpp b/main.cpp index 67f81dc..2f22b30 100755 --- a/main.cpp +++ b/main.cpp @@ -128,13 +128,14 @@ void handleCMDParams(int ac, char *av[], RuntimeEnv &env) { env.tuple_size = 0; env.tuples_per_buffer = 0; + env.max_threads = env.buffers_in_bufferpool; } nlohmann::json metrics_convert(RuntimeEnv &env) { nlohmann::json metrics_json = nlohmann::json::object(); // Use a JSON object // auto env_pts = env->pts->copyAll(); - if ((env.pts)) { + if ((env.pts) && (env.enable_updation == 1)) { std::vector env_pts; env_pts = env.pts->copy_newElements(); auto component_metrics_ = calculate_metrics(env_pts, env.buffer_size); @@ -159,49 +160,33 @@ nlohmann::json metrics_convert(RuntimeEnv &env) { nlohmann::json additional_msg(RuntimeEnv &env) { nlohmann::json metrics_json = nlohmann::json::object(); // Use a JSON object metrics_json["totalTime_ms"] = env.tf_paras.elapsed_time; + // metrics_json["freeBufferQ_load"] = std::get<0>(env.tf_paras.latest_queueSizes); + // metrics_json["compressedBufferQ_load"] = std::get<1>(env.tf_paras.latest_queueSizes); + // metrics_json["decompressedBufferQ_load"] = std::get<2>(env.tf_paras.latest_queueSizes); + // metrics_json["deserializedBufferQ_load"] = std::get<3>(env.tf_paras.latest_queueSizes); return metrics_json; } void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) { try { - // Acquire the lock to ensure thread-safe access to env_ - // std::lock_guard lock(env_mutex); - // Assuming `env_json` is a JSON object - - const auto &env_object = env_json; - RuntimeEnv env_; - env_.transfer_id = std::stoll(env_json.at("transferID").get()); - env_.system = env_json.at("system").get(); - env_.compression_algorithm = env_json.at("compressionType").get(); - env_.iformat = std::stoi(env_json.at("intermediateFormat").get()); - env_.buffer_size = std::stoi(env_json.at("bufferSize").get()); - env_.buffers_in_bufferpool = std::stoi(env_json.at("bufferpoolSize").get()) / env_.buffer_size; - env_.sleep_time = std::chrono::milliseconds(std::stoll(env_json.at("sleepTime").get())); - env_.read_parallelism = std::stoi(env_json.at("readParallelism").get()); - env_.read_partitions = std::stoi(env_json.at("readPartitions").get()); - env_.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); - env_.network_parallelism = std::stoi(env_json.at("netParallelism").get()); - env_.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); + // env.transfer_id = std::stoll(env_json.at("transferID").get()); + // env.system = env_json.at("system").get(); + // env.compression_algorithm = env_json.at("compressionType").get(); + // env.iformat = std::stoi(env_json.at("intermediateFormat").get()); + // env.buffer_size = std::stoi(env_json.at("bufferSize").get()); + // env.buffers_in_bufferpool = std::stoi(env_json.at("bufferpoolSize").get()) / env_.buffer_size; + // env.sleep_time = std::chrono::milliseconds(std::stoll(env_json.at("sleepTime").get())); + // env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); + // env.read_partitions = std::stoi(env_json.at("readPartitions").get()); + // env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); + // env.network_parallelism = std::stoi(env_json.at("netParallelism").get()); + // env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); if (env.enable_updation == 1) { - // Lock the mutex to ensure exclusive access to env_ - // std::lock_guard lock(env.env_mutex); - - env.transfer_id = env_.transfer_id; - env.system = env_.system; - env.compression_algorithm = env_.compression_algorithm; - env.iformat = env_.iformat; - env.buffer_size = env_.buffer_size; - env.buffers_in_bufferpool = env_.buffers_in_bufferpool; - env.sleep_time = env_.sleep_time; - env.read_parallelism = env_.read_parallelism; - env.read_partitions = env_.read_partitions; - env.deser_parallelism = env_.deser_parallelism; - env.network_parallelism = env_.network_parallelism; - env.compression_parallelism = env_.compression_parallelism; - - // env.env_condition.notify_all(); + env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); + env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); + env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); } } catch (const std::exception &e) { From ca032bb5c19dcb711822728d1fb137d6cd8d89c7 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 2 Mar 2025 09:47:56 +0100 Subject: [PATCH 02/19] Use Env manager for compress and send parallelism --- DataSources/DataSource.h | 20 +- docker-xdbc.yml | 83 +++++ main.cpp | 137 ++++---- xdbcserver.cpp | 677 ++++++++++++++++++++------------------- xdbcserver.h | 12 +- 5 files changed, 541 insertions(+), 388 deletions(-) create mode 100644 docker-xdbc.yml diff --git a/DataSources/DataSource.h b/DataSources/DataSource.h index 87a5678..4f78b7d 100644 --- a/DataSources/DataSource.h +++ b/DataSources/DataSource.h @@ -14,18 +14,21 @@ // #define TUPLE_SIZE 48 // #define SLEEP_TIME 5ms -struct Part { +struct Part +{ int id; int startOff; long endOff; }; -struct SchemaAttribute { +struct SchemaAttribute +{ std::string name; std::string tpe; int size; }; -struct ProfilingTimestamps { +struct ProfilingTimestamps +{ std::chrono::high_resolution_clock::time_point timestamp; int thread; std::string component; @@ -35,12 +38,14 @@ typedef std::shared_ptr> FBQ_ptr; typedef std::shared_ptr> FPQ_ptr; typedef std::shared_ptr> PTQ_ptr; -struct transfer_details { +struct transfer_details +{ float elapsed_time = 0.0f; // Default value for elapsed_time std::tuple latest_queueSizes; }; -struct RuntimeEnv { +struct RuntimeEnv +{ long transfer_id; std::string compression_algorithm; int iformat; @@ -77,7 +82,7 @@ struct RuntimeEnv { int profilingInterval; int spawn_source; - std::atomic enable_updation; + std::atomic enable_updation = 0; transfer_details tf_paras; int max_threads = 16; EnvironmentManager env_manager; @@ -85,7 +90,8 @@ struct RuntimeEnv { PTQ_ptr pts; }; -class DataSource { +class DataSource +{ public: DataSource(RuntimeEnv &xdbcEnv, std::string tableName); diff --git a/docker-xdbc.yml b/docker-xdbc.yml new file mode 100644 index 0000000..67215a6 --- /dev/null +++ b/docker-xdbc.yml @@ -0,0 +1,83 @@ +services: + xdbc-server: + image: xdbc-server:latest + container_name: xdbcserver + restart: always + volumes: + - /dev/shm:/dev/shm + ports: + - 1234:1234 + - 1235:1235 + - 1236:1236 + - 1237:1237 + - 1238:1238 + shm_size: '16gb' + cap_add: + - NET_ADMIN + networks: + - xdbc-net + labels: + com.docker-tc.enabled: 1 + + # xdbc-client: + # image: xdbc-client:latest + # container_name: xdbcclient + # restart: always + # volumes: + # - /dev/shm:/dev/shm + # shm_size: '16gb' + # cap_add: + # - NET_ADMIN + # networks: + # - xdbc-net + # labels: + # com.docker-tc.enabled: 1 + + #xdbc-python: + # image: xdbc-python:latest + # container_name: xdbcpython + # restart: always + # volumes: + # - /dev/shm:/dev/shm + # shm_size: '16gb' + # cap_add: + # - NET_ADMIN + # networks: + # - xdbc-net + # labels: + # com.docker-tc.enabled: 1 + + #postgres-1: + # image: postgresdb + # container_name: pg1 + # restart: always + # volumes: + # - test-data:/data + # - pgvolume1:/var/lib/postgresql/ + # ports: + # - 15432:5432 + # shm_size: '2gb' + # networks: + # - xdbc-net + +# clickhouse: +# image: clickhousedb +# container_name: ch +# privileged: true +# ports: +# - 19000:9000 +# - 18123:8123 +# volumes: +# - test-data:/data +# - chdbvol:/var/lib/clickhouse +# shm_size: '2gb' + +volumes: + test-data: + external: true + pgvolume1: + chdbvol: + +networks: + xdbc-net: + external: true \ No newline at end of file diff --git a/main.cpp b/main.cpp index 2f22b30..45f3d67 100755 --- a/main.cpp +++ b/main.cpp @@ -13,21 +13,22 @@ using namespace std; namespace po = boost::program_options; -void handleCMDParams(int ac, char *av[], RuntimeEnv &env) { +void handleCMDParams(int ac, char *av[], RuntimeEnv &env) +{ // Declare the supported options. po::options_description desc("Usage: ./xdbc-server [options]\n\nAllowed options"); desc.add_options()("help,h", "Produce this help message.")("system,y", po::value()->default_value("csv"), "Set system: \nDefault:\n csv\nOther:\n postgres, clickhouse")( - "compression-type,c", po::value()->default_value("nocomp"), - "Set Compression algorithm: \nDefault:\n nocomp\nOther:\n zstd\n snappy\n lzo\n lz4\n zlib\n cols")( - "intermediate-format,f", po::value()->default_value(1), - "Set intermediate-format: \nDefault:\n 1 (row)\nOther:\n 2 (col)")("buffer-size,b", - po::value()->default_value(64), - "Set buffer-size of buffers (in KiB).\nDefault: 64")( - "bufferpool-size,p", po::value()->default_value(4096), - "Set bufferpool memory size (in KiB).\nDefault: 4096") - //("tuple-size,t", po::value()->default_value(48), "Set the tuple size.\nDefault: 48") - ("sleep-time,s", po::value()->default_value(5), "Set a sleep-time in milli seconds.\nDefault: 5ms")( + "compression-type,c", po::value()->default_value("nocomp"), + "Set Compression algorithm: \nDefault:\n nocomp\nOther:\n zstd\n snappy\n lzo\n lz4\n zlib\n cols")( + "intermediate-format,f", po::value()->default_value(1), + "Set intermediate-format: \nDefault:\n 1 (row)\nOther:\n 2 (col)")("buffer-size,b", + po::value()->default_value(64), + "Set buffer-size of buffers (in KiB).\nDefault: 64")( + "bufferpool-size,p", po::value()->default_value(4096), + "Set bufferpool memory size (in KiB).\nDefault: 4096") + //("tuple-size,t", po::value()->default_value(48), "Set the tuple size.\nDefault: 48") + ("sleep-time,s", po::value()->default_value(5), "Set a sleep-time in milli seconds.\nDefault: 5ms")( "read-parallelism,rp", po::value()->default_value(4), "Set the read parallelism grade.\nDefault: 4")( "read-partitions,rpp", po::value()->default_value(1), "Set the number of read partitions.\nDefault: 1")("deser-parallelism,dp", @@ -41,7 +42,7 @@ void handleCMDParams(int ac, char *av[], RuntimeEnv &env) { "Set the transfer id.\nDefault: 0")("profiling-interval", po::value()->default_value(1000), "Set profiling interval.\nDefault: 1000")("skip-deserializer", po::value()->default_value( - false), + false), "Skip deserialization (0/1).\nDefault: false")( "spawn-source", po::value()->default_value(0), "Set spawn source (0 or 1).\nDefault: 0"); @@ -53,30 +54,36 @@ void handleCMDParams(int ac, char *av[], RuntimeEnv &env) { po::store(po::command_line_parser(ac, av).options(desc).positional(p).run(), vm); po::notify(vm); - if (vm.count("help")) { + if (vm.count("help")) + { cout << desc << "\n"; exit(0); } - if (vm.count("system")) { + if (vm.count("system")) + { spdlog::get("XDBC.SERVER")->info("system: {0}", vm["system"].as()); env.system = vm["system"].as(); } - if (vm.count("intermediate-format")) { + if (vm.count("intermediate-format")) + { spdlog::get("XDBC.SERVER")->info("Intermediate format: {0}", vm["intermediate-format"].as()); env.iformat = vm["intermediate-format"].as(); } - if (vm.count("compression-type")) { + if (vm.count("compression-type")) + { spdlog::get("XDBC.SERVER")->info("Compression algorithm: {0}", vm["compression-type"].as()); env.compression_algorithm = vm["compression-type"].as(); } - if (vm.count("buffer-size")) { + if (vm.count("buffer-size")) + { spdlog::get("XDBC.SERVER")->info("Buffer-size: {0} KiB", vm["buffer-size"].as()); env.buffer_size = vm["buffer-size"].as(); } - if (vm.count("bufferpool-size")) { + if (vm.count("bufferpool-size")) + { spdlog::get("XDBC.SERVER")->info("Bufferpool-size: {0} KiB", vm["bufferpool-size"].as()); env.buffers_in_bufferpool = vm["bufferpool-size"].as() / vm["buffer-size"].as(); spdlog::get("XDBC.SERVER")->info("Buffers in Bufferpool: {0}", env.buffers_in_bufferpool); @@ -85,43 +92,53 @@ void handleCMDParams(int ac, char *av[], RuntimeEnv &env) { spdlog::get("XDBC.SERVER")->info("Tuple size: {0}", vm["tuple-size"].as()); env.tuple_size = vm["tuple-size"].as(); }*/ - if (vm.count("sleep-time")) { + if (vm.count("sleep-time")) + { spdlog::get("XDBC.SERVER")->info("Sleep time: {0}ms", vm["sleep-time"].as()); env.sleep_time = std::chrono::milliseconds(vm["sleep-time"].as()); } - if (vm.count("read-parallelism")) { + if (vm.count("read-parallelism")) + { spdlog::get("XDBC.SERVER")->info("Read parallelism: {0}", vm["read-parallelism"].as()); env.read_parallelism = vm["read-parallelism"].as(); } - if (vm.count("read-partitions")) { + if (vm.count("read-partitions")) + { spdlog::get("XDBC.SERVER")->info("Read partitions: {0}", vm["read-partitions"].as()); env.read_partitions = vm["read-partitions"].as(); } - if (vm.count("network-parallelism")) { + if (vm.count("network-parallelism")) + { spdlog::get("XDBC.SERVER")->info("Network parallelism: {0}", vm["network-parallelism"].as()); env.network_parallelism = vm["network-parallelism"].as(); } - if (vm.count("deser-parallelism")) { + if (vm.count("deser-parallelism")) + { spdlog::get("XDBC.SERVER")->info("Deserialization parallelism: {0}", vm["deser-parallelism"].as()); env.deser_parallelism = vm["deser-parallelism"].as(); } - if (vm.count("compression-parallelism")) { + if (vm.count("compression-parallelism")) + { spdlog::get("XDBC.SERVER")->info("Compression parallelism: {0}", vm["compression-parallelism"].as()); env.compression_parallelism = vm["compression-parallelism"].as(); } - if (vm.count("transfer-id")) { + if (vm.count("transfer-id")) + { spdlog::get("XDBC.SERVER")->info("Transfer id: {0}", vm["transfer-id"].as()); env.transfer_id = vm["transfer-id"].as(); } - if (vm.count("profiling-interval")) { + if (vm.count("profiling-interval")) + { spdlog::get("XDBC.SERVER")->info("Profiling interval: {0}", vm["profiling-interval"].as()); env.profilingInterval = vm["profiling-interval"].as(); } - if (vm.count("skip-deserializer")) { + if (vm.count("skip-deserializer")) + { spdlog::get("XDBC.SERVER")->info("Skip serializer: {0}", vm["skip-deserializer"].as()); env.skip_deserializer = vm["skip-deserializer"].as(); } - if (vm.count("spawn-source")) { + if (vm.count("spawn-source")) + { spdlog::get("XDBC.SERVER")->info("Spawn source: {0}", vm["spawn-source"].as()); env.spawn_source = vm["spawn-source"].as(); } @@ -131,16 +148,19 @@ void handleCMDParams(int ac, char *av[], RuntimeEnv &env) { env.max_threads = env.buffers_in_bufferpool; } -nlohmann::json metrics_convert(RuntimeEnv &env) { +nlohmann::json metrics_convert(RuntimeEnv &env) +{ nlohmann::json metrics_json = nlohmann::json::object(); // Use a JSON object // auto env_pts = env->pts->copyAll(); - if ((env.pts) && (env.enable_updation == 1)) { + if ((env.pts) && (env.enable_updation == 1)) + { std::vector env_pts; env_pts = env.pts->copy_newElements(); auto component_metrics_ = calculate_metrics(env_pts, env.buffer_size); - for (const auto &pair: component_metrics_) { + for (const auto &pair : component_metrics_) + { nlohmann::json metric_object = nlohmann::json::object(); const Metrics &metric = pair.second; @@ -157,7 +177,8 @@ nlohmann::json metrics_convert(RuntimeEnv &env) { return metrics_json; } -nlohmann::json additional_msg(RuntimeEnv &env) { +nlohmann::json additional_msg(RuntimeEnv &env) +{ nlohmann::json metrics_json = nlohmann::json::object(); // Use a JSON object metrics_json["totalTime_ms"] = env.tf_paras.elapsed_time; // metrics_json["freeBufferQ_load"] = std::get<0>(env.tf_paras.latest_queueSizes); @@ -167,8 +188,10 @@ nlohmann::json additional_msg(RuntimeEnv &env) { return metrics_json; } -void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) { - try { +void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) +{ + try + { // env.transfer_id = std::stoll(env_json.at("transferID").get()); // env.system = env_json.at("system").get(); // env.compression_algorithm = env_json.at("compressionType").get(); @@ -182,19 +205,22 @@ void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) { // env.network_parallelism = std::stoi(env_json.at("netParallelism").get()); // env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); - if (env.enable_updation == 1) { + if (env.enable_updation == 1) + { - env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); - env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); - env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); + // env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); + // env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); + // env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); } } - catch (const std::exception &e) { + catch (const std::exception &e) + { std::cerr << "Error converting env JSON: " << e.what() << std::endl; } } -int main(int argc, char *argv[]) { +int main(int argc, char *argv[]) +{ auto console = spdlog::stdout_color_mt("XDBC.SERVER"); @@ -205,14 +231,15 @@ int main(int argc, char *argv[]) { xdbcEnv.enable_updation = 1; std::thread io_thread; WebSocketClient ws_client("xdbc-controller", "8003"); - if (xdbcEnv.spawn_source == 1) { + if (xdbcEnv.spawn_source == 1) + { ws_client.start(); - io_thread = std::thread([&]() { - ws_client.run( - std::bind(&metrics_convert, std::ref(xdbcEnv)), std::bind(&additional_msg, std::ref(xdbcEnv)), - std::bind(&env_convert, std::ref(xdbcEnv), std::placeholders::_1)); - }); - while (!ws_client.is_active()) { + io_thread = std::thread([&]() + { ws_client.run( + std::bind(&metrics_convert, std::ref(xdbcEnv)), std::bind(&additional_msg, std::ref(xdbcEnv)), + std::bind(&env_convert, std::ref(xdbcEnv), std::placeholders::_1)); }); + while (!ws_client.is_active()) + { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } } @@ -239,9 +266,11 @@ int main(int argc, char *argv[]) { std::ostringstream totalThroughput; std::ostringstream perBufferThroughput; - for (const auto &[component, metrics]: component_metrics) { + for (const auto &[component, metrics] : component_metrics) + { - if (!component.empty()) { + if (!component.empty()) + { totalTimes << component << ":\t" << metrics.overall_time_ms << "ms, "; procTimes << component << ":\t" << metrics.processing_time_ms << "ms, "; waitingTimes << component << ":\t" << metrics.waiting_time_ms << "ms, "; @@ -250,9 +279,7 @@ int main(int argc, char *argv[]) { } } - spdlog::get("XDBC.SERVER")->info("xdbc server | \n all:\t {} \n proc:\t{} \n wait:\t{} \n thr:\t {} \n thr/b:\t {}", - totalTimes.str(), procTimes.str(), waitingTimes.str(), totalThroughput.str(), - perBufferThroughput.str()); + spdlog::get("XDBC.SERVER")->info("xdbc server | \n all:\t {} \n proc:\t{} \n wait:\t{} \n thr:\t {} \n thr/b:\t {}", totalTimes.str(), procTimes.str(), waitingTimes.str(), totalThroughput.str(), perBufferThroughput.str()); auto loads = printAndReturnAverageLoad(xdbcEnv); @@ -299,9 +326,11 @@ int main(int argc, char *argv[]) { << std::get<3>(loads) << "\n"; csv_file.close(); - if (xdbcEnv.spawn_source == 1) { + if (xdbcEnv.spawn_source == 1) + { ws_client.stop(); - if (io_thread.joinable()) { + if (io_thread.joinable()) + { io_thread.join(); } } diff --git a/xdbcserver.cpp b/xdbcserver.cpp index 2bc885e..b8ddfbb 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -15,370 +15,405 @@ #include "DataSources/PQReader/PQReader.h" #include "spdlog/spdlog.h" - using namespace std; using namespace boost::asio; using ip::tcp; -size_t compute_crc(const void *data, size_t size) { - boost::crc_32_type crc; - crc.process_bytes(data, size); - return crc.checksum(); +size_t compute_crc(const void *data, size_t size) +{ + boost::crc_32_type crc; + crc.process_bytes(data, size); + return crc.checksum(); } -uint16_t compute_checksum(const uint8_t *data, std::size_t size) { - uint16_t checksum = 0; - for (std::size_t i = 0; i < size; ++i) { - checksum ^= data[i]; - } - return checksum; +uint16_t compute_checksum(const uint8_t *data, std::size_t size) +{ + uint16_t checksum = 0; + for (std::size_t i = 0; i < size; ++i) + { + checksum ^= data[i]; + } + return checksum; } - -string read_(tcp::socket &socket) { - boost::asio::streambuf buf; - try { - size_t b = boost::asio::read_until(socket, buf, "\n"); - //spdlog::get("XDBC.SERVER")->info("Got bytes: {0} ", b); - } - catch (const boost::system::system_error &e) { - spdlog::get("XDBC.SERVER")->warn("Boost error while reading: {0} ", e.what()); - } - - - string data = boost::asio::buffer_cast(buf.data()); - return data; +string read_(tcp::socket &socket) +{ + boost::asio::streambuf buf; + try + { + size_t b = boost::asio::read_until(socket, buf, "\n"); + // spdlog::get("XDBC.SERVER")->info("Got bytes: {0} ", b); + } + catch (const boost::system::system_error &e) + { + spdlog::get("XDBC.SERVER")->warn("Boost error while reading: {0} ", e.what()); + } + + string data = boost::asio::buffer_cast(buf.data()); + return data; } - XDBCServer::XDBCServer(RuntimeEnv &xdbcEnv) - : bp(), - xdbcEnv(&xdbcEnv), - totalSentBuffers(0), - tableName() { - - PTQ_ptr pq(new customQueue); - xdbcEnv.pts = pq; - - //initialize read thread status - xdbcEnv.finishedReadThreads.store(0); - - //initialize free queue - xdbcEnv.freeBufferPtr = std::make_shared>(); - - //initially all buffers are put in the free buffer queue - for (int i = 0; i < xdbcEnv.buffers_in_bufferpool; i++) - xdbcEnv.freeBufferPtr->push(i); - - - //initialize partitions queue - xdbcEnv.partPtr = std::make_shared>(); - - int total_workers = xdbcEnv.read_parallelism + xdbcEnv.deser_parallelism + - xdbcEnv.compression_parallelism + xdbcEnv.network_parallelism; - - //each producer thread always needs a buffer from the free ones - int available_buffers_for_queues = xdbcEnv.buffers_in_bufferpool - total_workers; - - if (xdbcEnv.buffers_in_bufferpool < total_workers || - available_buffers_for_queues < total_workers) { - - spdlog::get("XDBC.SERVER")->error( - "Buffer allocation error: Total buffers: {0}. " - "\nRequired buffers: Total: {1}," - "\nAvailable for queues: {2}. " - "\nIncrease the buffer pool size to at least {1}.", - xdbcEnv.buffers_in_bufferpool, - total_workers, - available_buffers_for_queues); - - } - - int queueCapacityPerComp = available_buffers_for_queues / 4; - int deserQueueCapacity = queueCapacityPerComp + available_buffers_for_queues % 4; - - //initialize deser queue(s) - xdbcEnv.deserBufferPtr = std::make_shared>(); - xdbcEnv.deserBufferPtr->setCapacity(deserQueueCapacity); - xdbcEnv.finishedDeserThreads.store(0); - - //initialize compression queue - xdbcEnv.compBufferPtr = std::make_shared>(); - xdbcEnv.compBufferPtr->setCapacity(queueCapacityPerComp); - xdbcEnv.finishedCompThreads.store(0); - - //initialize send queue - xdbcEnv.sendBufferPtr = std::make_shared>(); - xdbcEnv.sendBufferPtr->setCapacity(queueCapacityPerComp); - xdbcEnv.finishedSendThreads.store(0); - - spdlog::get("XDBC.SERVER")->info("Initialized queues, " - "freeBuffersQ: {0}, " - "deserQ:{1}, " - "compQ: {2}, " - "sendQ: {2}", - xdbcEnv.buffers_in_bufferpool, deserQueueCapacity, queueCapacityPerComp); - - - //initialize send thread flags - for (int i = 0; i < xdbcEnv.network_parallelism; i++) { - FBQ_ptr q1(new customQueue); - xdbcEnv.sendThreadReady.push_back(q1); - } - - xdbcEnv.bpPtr = &bp; - - spdlog::get("XDBC.SERVER")->info("Created XDBC Server with BPS: {0} KiB, buffers, BS: {1} KiB", - xdbcEnv.buffer_size * xdbcEnv.buffers_in_bufferpool, xdbcEnv.buffer_size); - + : bp(), + xdbcEnv(&xdbcEnv), + totalSentBuffers(0), + tableName() +{ + + PTQ_ptr pq(new customQueue); + xdbcEnv.pts = pq; + + // initialize read thread status + xdbcEnv.finishedReadThreads.store(0); + + // initialize free queue + xdbcEnv.freeBufferPtr = std::make_shared>(); + + // initially all buffers are put in the free buffer queue + for (int i = 0; i < xdbcEnv.buffers_in_bufferpool; i++) + xdbcEnv.freeBufferPtr->push(i); + + // initialize partitions queue + xdbcEnv.partPtr = std::make_shared>(); + + int total_workers = xdbcEnv.read_parallelism + xdbcEnv.deser_parallelism + + xdbcEnv.compression_parallelism + xdbcEnv.network_parallelism; + + // each producer thread always needs a buffer from the free ones + int available_buffers_for_queues = xdbcEnv.buffers_in_bufferpool - total_workers; + + if (xdbcEnv.buffers_in_bufferpool < total_workers || + available_buffers_for_queues < total_workers) + { + + spdlog::get("XDBC.SERVER")->error("Buffer allocation error: Total buffers: {0}. " + "\nRequired buffers: Total: {1}," + "\nAvailable for queues: {2}. " + "\nIncrease the buffer pool size to at least {1}.", + xdbcEnv.buffers_in_bufferpool, total_workers, available_buffers_for_queues); + } + + int queueCapacityPerComp = available_buffers_for_queues / 4; + int deserQueueCapacity = queueCapacityPerComp + available_buffers_for_queues % 4; + + // initialize deser queue(s) + xdbcEnv.deserBufferPtr = std::make_shared>(); + xdbcEnv.deserBufferPtr->setCapacity(deserQueueCapacity); + xdbcEnv.finishedDeserThreads.store(0); + + // initialize compression queue + xdbcEnv.compBufferPtr = std::make_shared>(); + xdbcEnv.compBufferPtr->setCapacity(queueCapacityPerComp); + xdbcEnv.finishedCompThreads.store(0); + + // initialize send queue + xdbcEnv.sendBufferPtr = std::make_shared>(); + xdbcEnv.sendBufferPtr->setCapacity(queueCapacityPerComp); + xdbcEnv.finishedSendThreads.store(0); + + spdlog::get("XDBC.SERVER")->info("Initialized queues, " + "freeBuffersQ: {0}, " + "deserQ:{1}, " + "compQ: {2}, " + "sendQ: {2}", + xdbcEnv.buffers_in_bufferpool, deserQueueCapacity, queueCapacityPerComp); + + // initialize send thread flags + for (int i = 0; i < xdbcEnv.network_parallelism; i++) + { + FBQ_ptr q1(new customQueue); + xdbcEnv.sendThreadReady.push_back(q1); + } + + xdbcEnv.bpPtr = &bp; + + spdlog::get("XDBC.SERVER")->info("Created XDBC Server with BPS: {0} KiB, buffers, BS: {1} KiB", xdbcEnv.buffer_size * xdbcEnv.buffers_in_bufferpool, xdbcEnv.buffer_size); } -void XDBCServer::monitorQueues() { +void XDBCServer::monitorQueues() +{ - long long curTimeInterval = xdbcEnv->profilingInterval / 1000; + long long curTimeInterval = xdbcEnv->profilingInterval / 1000; - while (xdbcEnv->monitor) { - //auto now = std::chrono::high_resolution_clock::now(); - //auto timestamp = std::chrono::duration_cast(now.time_since_epoch()).count(); + while (xdbcEnv->monitor) + { + // auto now = std::chrono::high_resolution_clock::now(); + // auto timestamp = std::chrono::duration_cast(now.time_since_epoch()).count(); - // Calculate the total size of all queues in each category - size_t readBufferTotalSize = xdbcEnv->freeBufferPtr->size(); + // Calculate the total size of all queues in each category + size_t readBufferTotalSize = xdbcEnv->freeBufferPtr->size(); - size_t deserBufferTotalSize = xdbcEnv->deserBufferPtr->size(); + size_t deserBufferTotalSize = xdbcEnv->deserBufferPtr->size(); - size_t compressedBufferTotalSize = xdbcEnv->compBufferPtr->size(); - size_t sendBufferTotalSize = xdbcEnv->sendBufferPtr->size(); + size_t compressedBufferTotalSize = xdbcEnv->compBufferPtr->size(); + size_t sendBufferTotalSize = xdbcEnv->sendBufferPtr->size(); - // Store the measurement as a tuple - xdbcEnv->queueSizes.emplace_back(curTimeInterval, readBufferTotalSize, deserBufferTotalSize, - compressedBufferTotalSize, sendBufferTotalSize); + // Store the measurement as a tuple + xdbcEnv->queueSizes.emplace_back(curTimeInterval, readBufferTotalSize, deserBufferTotalSize, + compressedBufferTotalSize, sendBufferTotalSize); - std::this_thread::sleep_for(std::chrono::milliseconds(xdbcEnv->profilingInterval)); - curTimeInterval += xdbcEnv->profilingInterval / 1000; - } + std::this_thread::sleep_for(std::chrono::milliseconds(xdbcEnv->profilingInterval)); + curTimeInterval += xdbcEnv->profilingInterval / 1000; + } } -int XDBCServer::send(int thr, DataSource &dataReader) { +int XDBCServer::send(int thr, DataSource &dataReader) +{ - xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "send", "start"}); - //spdlog::get("XDBC.SERVER")->info("Entered send thread: {0}", thr); - int port = 1234 + thr + 1; - boost::asio::io_context ioContext; - boost::asio::ip::tcp::acceptor listenerAcceptor(ioContext, - boost::asio::ip::tcp::endpoint(boost::asio::ip::tcp::v4(), - port)); - boost::asio::ip::tcp::socket socket(ioContext); + xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "send", "start"}); + // spdlog::get("XDBC.SERVER")->info("Entered send thread: {0}", thr); + int port = 1234 + thr + 1; + boost::asio::io_context ioContext; + boost::asio::ip::tcp::acceptor listenerAcceptor(ioContext, + boost::asio::ip::tcp::endpoint(boost::asio::ip::tcp::v4(), + port)); + boost::asio::ip::tcp::socket socket(ioContext); - //let main thread know socket is ready - xdbcEnv->sendThreadReady[thr]->push(1); + // let main thread know socket is ready + xdbcEnv->sendThreadReady[thr]->push(1); - listenerAcceptor.accept(socket); + listenerAcceptor.accept(socket); - spdlog::get("XDBC.SERVER")->info("Send thread {0} accepting on port: {1}", thr, port); - //get client - string readThreadId = read_(socket); - readThreadId.erase(std::remove(readThreadId.begin(), readThreadId.end(), '\n'), readThreadId.cend()); + spdlog::get("XDBC.SERVER")->info("Send thread {0} accepting on port: {1}", thr, port); + // get client + string readThreadId = read_(socket); + readThreadId.erase(std::remove(readThreadId.begin(), readThreadId.end(), '\n'), readThreadId.cend()); - spdlog::get("XDBC.SERVER")->info("Send thread {0} paired with Client rcv thread {1}", thr, readThreadId); + spdlog::get("XDBC.SERVER")->info("Send thread {0} paired with Client rcv thread {1}", thr, readThreadId); - int bufferId; - size_t totalSentBytes = 0; - int threadSentBuffers = 0; + int bufferId; + size_t totalSentBytes = 0; + int threadSentBuffers = 0; - boost::asio::const_buffer sendBuffer; + boost::asio::const_buffer sendBuffer; - bool boostError = false; - int emptyCtr = 0; + bool boostError = false; + int emptyCtr = 0; - while (emptyCtr < 1 && !boostError) { + while (emptyCtr < 1 && !boostError) + { - auto start_wait = std::chrono::high_resolution_clock::now(); + auto start_wait = std::chrono::high_resolution_clock::now(); - bufferId = xdbcEnv->sendBufferPtr->pop(); - xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "send", "pop"}); + bufferId = xdbcEnv->sendBufferPtr->pop(); + xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "send", "pop"}); - if (bufferId == -1) - emptyCtr++; - else { + if (bufferId == -1) + emptyCtr++; + else + { - Header *headerPtr = reinterpret_cast
(bp[bufferId].data()); - /*spdlog::get("XDBC.SERVER")->warn("buffer {0} compression: {1}, totalSize: {2}", bufferId, - headerPtr->compressionType, headerPtr->totalSize);*/ - sendBuffer = boost::asio::buffer(bp[bufferId], headerPtr->totalSize + sizeof(Header)); + Header *headerPtr = reinterpret_cast
(bp[bufferId].data()); + /*spdlog::get("XDBC.SERVER")->warn("buffer {0} compression: {1}, totalSize: {2}", bufferId, + headerPtr->compressionType, headerPtr->totalSize);*/ + sendBuffer = boost::asio::buffer(bp[bufferId], headerPtr->totalSize + sizeof(Header)); - try { - totalSentBytes += boost::asio::write(socket, sendBuffer); - threadSentBuffers++; + try + { + totalSentBytes += boost::asio::write(socket, sendBuffer); + threadSentBuffers++; - totalSentBuffers.fetch_add(1); + totalSentBuffers.fetch_add(1); - xdbcEnv->pts->push( - ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "send", "push"}); + xdbcEnv->pts->push( + ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "send", "push"}); - xdbcEnv->freeBufferPtr->push(bufferId); + xdbcEnv->freeBufferPtr->push(bufferId); + } + catch (const boost::system::system_error &e) + { + spdlog::get("XDBC.SERVER")->error("Error writing to socket: {0} ", e.what()); + boostError = true; + // Handle the error... + } + } + } + xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "send", "end"}); - } catch (const boost::system::system_error &e) { - spdlog::get("XDBC.SERVER")->error("Error writing to socket: {0} ", e.what()); - boostError = true; - // Handle the error... - } - } - } - xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "send", "end"}); + spdlog::get("XDBC.SERVER")->info("Send thread {0} finished. Bytes {1}, #buffers {2} ", thr, totalSentBytes, threadSentBuffers); - spdlog::get("XDBC.SERVER")->info("Send thread {0} finished. Bytes {1}, #buffers {2} ", - thr, totalSentBytes, threadSentBuffers); + boost::system::error_code ec; + socket.shutdown(boost::asio::ip::tcp::socket::shutdown_both, ec); + if (ec) + { + spdlog::get("XDBC.SERVER")->error("Server send thread {0} shut down error: {1}", thr, ec.message()); + } - boost::system::error_code ec; - socket.shutdown(boost::asio::ip::tcp::socket::shutdown_both, ec); - if (ec) { - spdlog::get("XDBC.SERVER")->error("Server send thread {0} shut down error: {1}", thr, ec.message()); - } + socket.close(ec); + if (ec) + { + spdlog::get("XDBC.SERVER")->error("Server send thread {0} close error: {1}", thr, ec.message()); + } - socket.close(ec); - if (ec) { - spdlog::get("XDBC.SERVER")->error("Server send thread {0} close error: {1}", thr, ec.message()); - } - - return 1; + return 1; } - -int XDBCServer::serve() { - - - boost::asio::io_context ioContext; - boost::asio::ip::tcp::acceptor acceptor(ioContext, - boost::asio::ip::tcp::endpoint(boost::asio::ip::tcp::v4(), 1234)); - boost::asio::ip::tcp::socket baseSocket(ioContext); - acceptor.accept(baseSocket); - - //read operation - - std::uint32_t dataSize = 0; - size_t len = boost::asio::read(baseSocket, boost::asio::buffer(&dataSize, sizeof(dataSize))); - std::vector tableNameStr(dataSize); - boost::asio::read(baseSocket, boost::asio::buffer(tableNameStr.data(), dataSize)); - tableName = std::string(tableNameStr.begin(), tableNameStr.end()); - //tableName = read_(baseSocket); - - //tableName.erase(std::remove(tableName.begin(), tableName.end(), '\n'), tableName.cend()); - - spdlog::get("XDBC.SERVER")->info("Client wants to read table {0} ", tableName); - - dataSize = 0; - len = boost::asio::read(baseSocket, boost::asio::buffer(&dataSize, sizeof(dataSize))); - std::vector schemaJSONstr(dataSize); - len = boost::asio::read(baseSocket, boost::asio::buffer(schemaJSONstr.data(), dataSize)); - xdbcEnv->schemaJSON = std::string(schemaJSONstr.begin(), schemaJSONstr.end()); - - //spdlog::get("XDBC.SERVER")->info("Got schema {0}", xdbcEnv->schemaJSON); - - - std::vector net_threads(xdbcEnv->network_parallelism); - std::vector comp_threads(xdbcEnv->compression_parallelism); - std::thread t1; - std::unique_ptr ds; - - if (xdbcEnv->system == "postgres") { - ds = std::make_unique(*xdbcEnv, tableName); - } else if (xdbcEnv->system == "clickhouse") { - ds = std::make_unique(*xdbcEnv, tableName); - } else if (xdbcEnv->system == "csv") { - ds = std::make_unique(*xdbcEnv, tableName); - } else if (xdbcEnv->system == "parquet") { - ds = std::make_unique(*xdbcEnv, tableName); - } - - xdbcEnv->tuple_size = std::accumulate(xdbcEnv->schema.begin(), xdbcEnv->schema.end(), 0, - [](int acc, const SchemaAttribute &attr) { - return acc + attr.size; - }); - xdbcEnv->tuples_per_buffer = (xdbcEnv->buffer_size * 1024 / xdbcEnv->tuple_size); - - bp.resize(xdbcEnv->buffers_in_bufferpool, - std::vector(xdbcEnv->tuples_per_buffer * xdbcEnv->tuple_size + sizeof(Header))); - spdlog::get("XDBC.SERVER")->info("Tuples per buffer: {0}", xdbcEnv->tuples_per_buffer); - spdlog::get("XDBC.SERVER")->info("Input table tuple size: {0} with schema:\n{1}", - xdbcEnv->tuple_size, ds->formatSchema(xdbcEnv->schema)); - - xdbcEnv->monitor.store(true); - - _monitorThread = std::thread(&XDBCServer::monitorQueues, this); - - t1 = std::thread([&ds]() { - ds->readData(); - }); - - spdlog::get("XDBC.SERVER")->info("Created {0} read threads", xdbcEnv->system); - - std::unique_ptr compressorPtr; - compressorPtr = std::make_unique(*xdbcEnv); - - for (int i = 0; i < xdbcEnv->compression_parallelism; i++) { - comp_threads[i] = std::thread(&Compressor::compress, compressorPtr.get(), i, xdbcEnv->compression_algorithm); - } - - spdlog::get("XDBC.SERVER")->info("Created compress threads: {0} ", xdbcEnv->compression_parallelism); - - for (int i = 0; i < xdbcEnv->network_parallelism; i++) { - net_threads[i] = std::thread(&XDBCServer::send, this, i, std::ref(*ds)); - } - //check that sockets are ready - int acc = 0; - int sendThreadReadyQ = 0; - while (acc != xdbcEnv->network_parallelism) { - acc += xdbcEnv->sendThreadReady[sendThreadReadyQ]->pop(); - spdlog::get("XDBC.SERVER")->info("Send threads ready: {0}/{1} ", acc, xdbcEnv->sendThreadReady.size()); - sendThreadReadyQ = (sendThreadReadyQ + 1) % xdbcEnv->network_parallelism; - } - - spdlog::get("XDBC.SERVER")->info("Created send threads: {0} ", xdbcEnv->network_parallelism); - - - const std::string msg = "Server ready\n"; - boost::system::error_code error; - size_t bs = boost::asio::write(baseSocket, boost::asio::buffer(msg), error); - if (error) { - spdlog::get("XDBC.SERVER")->warn("Boost error while writing: ", error.message()); - } - - //spdlog::get("XDBC.SERVER")->info("Basesocket signaled with bytes: {0} ", bs); - - - // Join all the threads - for (auto &thread: comp_threads) { - if (thread.joinable()) { - thread.join(); - } - } - - for (auto &thread: net_threads) { - if (thread.joinable()) { - thread.join(); - } - } - - xdbcEnv->monitor.store(false); - _monitorThread.join(); - - t1.join(); - boost::system::error_code ec; - baseSocket.shutdown(boost::asio::ip::tcp::socket::shutdown_both, ec); - if (ec) { - spdlog::get("XDBC.SERVER")->error("Base socket shut down error: {0}", ec.message()); - } - - baseSocket.close(ec); - if (ec) { - spdlog::get("XDBC.SERVER")->error("Base socket close error: {0}", ec.message()); - } - - return 1; +int XDBCServer::serve() +{ + xdbcEnv->env_manager.start(); + boost::asio::io_context ioContext; + boost::asio::ip::tcp::acceptor acceptor(ioContext, + boost::asio::ip::tcp::endpoint(boost::asio::ip::tcp::v4(), 1234)); + boost::asio::ip::tcp::socket baseSocket(ioContext); + acceptor.accept(baseSocket); + + // read operation + + std::uint32_t dataSize = 0; + size_t len = boost::asio::read(baseSocket, boost::asio::buffer(&dataSize, sizeof(dataSize))); + std::vector tableNameStr(dataSize); + boost::asio::read(baseSocket, boost::asio::buffer(tableNameStr.data(), dataSize)); + tableName = std::string(tableNameStr.begin(), tableNameStr.end()); + // tableName = read_(baseSocket); + + // tableName.erase(std::remove(tableName.begin(), tableName.end(), '\n'), tableName.cend()); + + spdlog::get("XDBC.SERVER")->info("Client wants to read table {0} ", tableName); + + dataSize = 0; + len = boost::asio::read(baseSocket, boost::asio::buffer(&dataSize, sizeof(dataSize))); + std::vector schemaJSONstr(dataSize); + len = boost::asio::read(baseSocket, boost::asio::buffer(schemaJSONstr.data(), dataSize)); + xdbcEnv->schemaJSON = std::string(schemaJSONstr.begin(), schemaJSONstr.end()); + + // spdlog::get("XDBC.SERVER")->info("Got schema {0}", xdbcEnv->schemaJSON); + + std::vector net_threads(xdbcEnv->network_parallelism); + std::vector comp_threads(xdbcEnv->compression_parallelism); + std::thread t1; + std::unique_ptr ds; + + if (xdbcEnv->system == "postgres") + { + ds = std::make_unique(*xdbcEnv, tableName); + } + else if (xdbcEnv->system == "clickhouse") + { + ds = std::make_unique(*xdbcEnv, tableName); + } + else if (xdbcEnv->system == "csv") + { + ds = std::make_unique(*xdbcEnv, tableName); + } + else if (xdbcEnv->system == "parquet") + { + ds = std::make_unique(*xdbcEnv, tableName); + } + + xdbcEnv->tuple_size = std::accumulate(xdbcEnv->schema.begin(), xdbcEnv->schema.end(), 0, + [](int acc, const SchemaAttribute &attr) + { + return acc + attr.size; + }); + xdbcEnv->tuples_per_buffer = (xdbcEnv->buffer_size * 1024 / xdbcEnv->tuple_size); + + bp.resize(xdbcEnv->buffers_in_bufferpool, + std::vector(xdbcEnv->tuples_per_buffer * xdbcEnv->tuple_size + sizeof(Header))); + spdlog::get("XDBC.SERVER")->info("Tuples per buffer: {0}", xdbcEnv->tuples_per_buffer); + spdlog::get("XDBC.SERVER")->info("Input table tuple size: {0} with schema:\n{1}", xdbcEnv->tuple_size, ds->formatSchema(xdbcEnv->schema)); + + xdbcEnv->monitor.store(true); + + _monitorThread = std::thread(&XDBCServer::monitorQueues, this); + + t1 = std::thread([&ds]() + { ds->readData(); }); + + spdlog::get("XDBC.SERVER")->info("Created {0} read threads", xdbcEnv->system); + + std::unique_ptr compressorPtr; + compressorPtr = std::make_unique(*xdbcEnv); + + //*** Create threads for compress operation + xdbcEnv->env_manager.registerOperation("compress", [&](int thr) + { try { + if (thr >= xdbcEnv->max_threads) { + spdlog::get("XCLIENT")->error("No of threads exceed limit"); + return; + } + compressorPtr->compress(thr, xdbcEnv->compression_algorithm); + } catch (const std::exception& e) { + spdlog::get("XCLIENT")->error("Exception in thread {}: {}", thr, e.what()); + } catch (...) { + spdlog::get("XCLIENT")->error("Unknown exception in thread {}", thr); + } }, xdbcEnv->compBufferPtr); + + xdbcEnv->env_manager.configureThreads("compress", xdbcEnv->compression_parallelism); // start serial component threads + //*** Finish creating threads for compress operation + + spdlog::get("XDBC.SERVER")->info("Created compress threads: {0} ", xdbcEnv->compression_parallelism); + + // for (int i = 0; i < xdbcEnv->network_parallelism; i++) + // { + // net_threads[i] = std::thread(&XDBCServer::send, this, i, std::ref(*ds)); + // } + // check that sockets are ready + + //*** Create threads for send operation + xdbcEnv->env_manager.registerOperation("send", [&](int thr) + { try { + if (thr >= xdbcEnv->max_threads) { + spdlog::get("XCLIENT")->error("No of threads exceed limit"); + return; + } + send(thr, *ds); + } catch (const std::exception& e) { + spdlog::get("XCLIENT")->error("Exception in thread {}: {}", thr, e.what()); + } catch (...) { + spdlog::get("XCLIENT")->error("Unknown exception in thread {}", thr); + } }, xdbcEnv->sendBufferPtr); + + xdbcEnv->env_manager.configureThreads("send", xdbcEnv->network_parallelism); // start serial component threads + //*** Finish creating threads for send operation + + int acc = 0; + int sendThreadReadyQ = 0; + while (acc != xdbcEnv->network_parallelism) + { + acc += xdbcEnv->sendThreadReady[sendThreadReadyQ]->pop(); + spdlog::get("XDBC.SERVER")->info("Send threads ready: {0}/{1} ", acc, xdbcEnv->sendThreadReady.size()); + sendThreadReadyQ = (sendThreadReadyQ + 1) % xdbcEnv->network_parallelism; + } + + spdlog::get("XDBC.SERVER")->info("Created send threads: {0} ", xdbcEnv->network_parallelism); + + const std::string msg = "Server ready\n"; + boost::system::error_code error; + size_t bs = boost::asio::write(baseSocket, boost::asio::buffer(msg), error); + if (error) + { + spdlog::get("XDBC.SERVER")->warn("Boost error while writing: ", error.message()); + } + + // spdlog::get("XDBC.SERVER")->info("Basesocket signaled with bytes: {0} ", bs); + + // Join all the threads + xdbcEnv->env_manager.joinThreads("compress"); + xdbcEnv->env_manager.joinThreads("send"); + // for (auto &thread : net_threads) + // { + // if (thread.joinable()) + // { + // thread.join(); + // } + // } + + xdbcEnv->monitor.store(false); + _monitorThread.join(); + + t1.join(); + boost::system::error_code ec; + baseSocket.shutdown(boost::asio::ip::tcp::socket::shutdown_both, ec); + if (ec) + { + spdlog::get("XDBC.SERVER")->error("Base socket shut down error: {0}", ec.message()); + } + + baseSocket.close(ec); + if (ec) + { + spdlog::get("XDBC.SERVER")->error("Base socket close error: {0}", ec.message()); + } + xdbcEnv->env_manager.stop(); // *** Stop Reconfigurration handler + return 1; } - - - - - - diff --git a/xdbcserver.h b/xdbcserver.h index fe26b61..69d432d 100755 --- a/xdbcserver.h +++ b/xdbcserver.h @@ -1,7 +1,6 @@ #ifndef XDBCSERVER_H #define XDBCSERVER_H - #include #include #include @@ -15,7 +14,8 @@ using namespace boost::asio; using ip::tcp; constexpr size_t MAX_ATTRIBUTES = 230; -struct Header { +struct Header +{ size_t compressionType; size_t totalSize; @@ -25,14 +25,15 @@ struct Header { size_t crc; size_t attributeSize[MAX_ATTRIBUTES]; size_t attributeComp[MAX_ATTRIBUTES]; - }; -class XDBCServer { +class XDBCServer +{ public: explicit XDBCServer(RuntimeEnv &env); int serve(); + // int finishserve(); int send(int threadno, DataSource &dataReader); @@ -46,5 +47,4 @@ class XDBCServer { void monitorQueues(); }; - -#endif //XDBCSERVER_H +#endif // XDBCSERVER_H From 5d7dddf6f64c40183590a50ad183dc33c7947632 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 2 Mar 2025 14:40:14 +0100 Subject: [PATCH 03/19] Use Env Manager for creating threads of csv deser by adding a new env_manager2 object --- Compression/Compressor.cpp | 8 +- DataSources/CSVReader/CSVReader.cpp | 315 +++++++++++++++++----------- DataSources/CSVReader/CSVReader.h | 11 +- DataSources/DataSource.h | 3 +- xdbcserver.cpp | 47 ++--- 5 files changed, 215 insertions(+), 169 deletions(-) diff --git a/Compression/Compressor.cpp b/Compression/Compressor.cpp index f57c41f..506cfd4 100644 --- a/Compression/Compressor.cpp +++ b/Compression/Compressor.cpp @@ -110,10 +110,10 @@ void Compressor::compress(int thr, const std::string &compName) { //notify that we finished xdbcEnv->finishedCompThreads.fetch_add(1); - if (xdbcEnv->finishedCompThreads == xdbcEnv->compression_parallelism) { - for (int i = 0; i < xdbcEnv->network_parallelism; i++) - xdbcEnv->sendBufferPtr->push(-1); - } + // if (xdbcEnv->finishedCompThreads == xdbcEnv->compression_parallelism) { + // for (int i = 0; i < xdbcEnv->network_parallelism; i++) + // xdbcEnv->sendBufferPtr->push(-1); + // } } diff --git a/DataSources/CSVReader/CSVReader.cpp b/DataSources/CSVReader/CSVReader.cpp index c8371ef..6426e40 100644 --- a/DataSources/CSVReader/CSVReader.cpp +++ b/DataSources/CSVReader/CSVReader.cpp @@ -14,17 +14,20 @@ #include size_t finalizeAndWriteRecordBatchToMemory( - const std::vector> &builders, - const std::shared_ptr &schema, - void *memoryPtr, - size_t availableBufferSize, - size_t numRows) { + const std::vector> &builders, + const std::shared_ptr &schema, + void *memoryPtr, + size_t availableBufferSize, + size_t numRows) +{ std::vector> arrays; - for (const auto &builder: builders) { + for (const auto &builder : builders) + { std::shared_ptr array; auto status = builder->Finish(&array); - if (!status.ok()) { + if (!status.ok()) + { throw std::runtime_error("Failed to finalize Arrow builder: " + status.message()); } arrays.push_back(array); @@ -35,83 +38,93 @@ size_t finalizeAndWriteRecordBatchToMemory( // Create a MutableBuffer auto buffer = std::make_shared( - static_cast(memoryPtr), availableBufferSize); + static_cast(memoryPtr), availableBufferSize); auto bufferWriter = std::make_shared(buffer); // Serialize the RecordBatch using MakeFileWriter auto fileWriterResult = arrow::ipc::MakeFileWriter(bufferWriter, schema); - if (!fileWriterResult.ok()) { + if (!fileWriterResult.ok()) + { throw std::runtime_error("Failed to create FileWriter: " + fileWriterResult.status().message()); } auto fileWriter = fileWriterResult.ValueOrDie(); auto status = fileWriter->WriteRecordBatch(*recordBatch); - if (!status.ok()) { + if (!status.ok()) + { throw std::runtime_error("Failed to write RecordBatch: " + status.message()); } status = fileWriter->Close(); - if (!status.ok()) { + if (!status.ok()) + { throw std::runtime_error("Failed to close FileWriter: " + status.message()); } // Calculate the serialized size and add padding for 8-byte alignment size_t serializedSize = bufferWriter->Tell().ValueOrDie(); - if (serializedSize > availableBufferSize) { + if (serializedSize > availableBufferSize) + { throw std::runtime_error("Serialized data exceeds available buffer size"); } return serializedSize; } - -void handle_error(const char *msg) { +void handle_error(const char *msg) +{ perror(msg); exit(255); } -static uintmax_t wc(char const *fname) { +static uintmax_t wc(char const *fname) +{ static const auto BUFFER_SIZE = 16 * 1024; int fd = open(fname, O_RDONLY); - if (fd == -1) { + if (fd == -1) + { handle_error("open"); spdlog::get("XDBC.SERVER")->error("File does not exist: {0}", fname); } - posix_fadvise(fd, 0, 0, 1); // FDADVICE_SEQUENTIAL + posix_fadvise(fd, 0, 0, 1); // FDADVICE_SEQUENTIAL char buf[BUFFER_SIZE + 1]; uintmax_t lines = 0; - while (size_t bytes_read = read(fd, buf, BUFFER_SIZE)) { - if (bytes_read == (size_t) -1) + while (size_t bytes_read = read(fd, buf, BUFFER_SIZE)) + { + if (bytes_read == (size_t)-1) handle_error("read failed"); if (!bytes_read) break; - for (char *p = buf; (p = (char *) memchr(p, '\n', (buf + bytes_read) - p)); ++p) + for (char *p = buf; (p = (char *)memchr(p, '\n', (buf + bytes_read) - p)); ++p) ++lines; } return lines; } - -CSVReader::CSVReader(RuntimeEnv &xdbcEnv, const std::string &tableName) : - DataSource(xdbcEnv, tableName), - bp(*xdbcEnv.bpPtr), - finishedReading(false), - totalReadBuffers(0), - xdbcEnv(&xdbcEnv) { +CSVReader::CSVReader(RuntimeEnv &xdbcEnv, const std::string &tableName) : DataSource(xdbcEnv, tableName), + bp(*xdbcEnv.bpPtr), + finishedReading(false), + totalReadBuffers(0), + xdbcEnv(&xdbcEnv) +{ spdlog::get("XDBC.SERVER")->info("CSV Constructor called with table: {0}", tableName); } -void CSVReader::readData() { +void CSVReader::readData() +{ + xdbcEnv->env_manager2.start(); auto start_read = std::chrono::steady_clock::now(); - int threadWrittenTuples[xdbcEnv->deser_parallelism]; - int threadWrittenBuffers[xdbcEnv->deser_parallelism]; + std::vector threadWrittenTuples(xdbcEnv->deser_parallelism, 0); // Initialize all elements to 0 + std::vector threadWrittenBuffers(xdbcEnv->deser_parallelism, 0); // Initialize all elements to 0 + // int threadWrittenTuples[xdbcEnv->read_parallelism]; + // int threadWrittenBuffers[xdbcEnv->read_parallelism]; std::thread readThreads[xdbcEnv->read_parallelism]; std::thread deSerThreads[xdbcEnv->deser_parallelism]; @@ -128,7 +141,8 @@ void CSVReader::readData() { if (partSizeDiv.rem > 0) partSize++; - for (int i = partNum - 1; i >= 0; i--) { + for (int i = partNum - 1; i >= 0; i--) + { Part p{}; p.id = i; p.startOff = i * partSize; @@ -139,68 +153,85 @@ void CSVReader::readData() { xdbcEnv->partPtr->push(p); - spdlog::get("XDBC.SERVER")->info("Partition {0} [{1},{2}] pushed into queue", - p.id, p.startOff, p.endOff); - + spdlog::get("XDBC.SERVER")->info("Partition {0} [{1},{2}] pushed into queue", p.id, p.startOff, p.endOff); } - //final partition + // final partition Part fP{}; fP.id = -1; - for (int i = 0; i < xdbcEnv->read_parallelism; i++) { + for (int i = 0; i < xdbcEnv->read_parallelism; i++) + { xdbcEnv->partPtr->push(fP); readThreads[i] = std::thread(&CSVReader::readCSV, this, i); } - auto start_deser = std::chrono::steady_clock::now(); - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) { - threadWrittenTuples[i] = 0; - threadWrittenBuffers[i] = 0; - - deSerThreads[i] = std::thread(&CSVReader::deserializeCSV, - this, i, - std::ref(threadWrittenTuples[i]), std::ref(threadWrittenBuffers[i]) - ); + // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) + // { + + // deSerThreads[i] = std::thread(&CSVReader::deserializeCSV, this, i); + // std::ref(threadWrittenTuples[i]), std::ref(threadWrittenBuffers[i])); + // } + + //*** Create threads for deserialize operation + xdbcEnv->env_manager2.registerOperation("deserialize", [&](int thr) + { try { + if (thr >= xdbcEnv->deser_parallelism) { + spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); + return; } + // this->deserializeCSV(thr, threadWrittenTuples[thr], threadWrittenBuffers[thr]); + deserializeCSV(thr); + } catch (const std::exception& e) { + spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); + } catch (...) { + spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); + } }, xdbcEnv->deserBufferPtr); + xdbcEnv->env_manager2.configureThreads("deserialize", xdbcEnv->deser_parallelism); // start compress component threads + //*** Finish creating threads for deserialize operation int totalTuples = 0; int totalBuffers = 0; - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) { - deSerThreads[i].join(); + // totalTuples = std::accumulate(threadWrittenTuples.begin(), threadWrittenTuples.end(), 0); + + xdbcEnv->env_manager2.joinThreads("deserialize"); + for (int i = 0; i < xdbcEnv->deser_parallelism; i++) + { + // deSerThreads[i].join(); totalTuples += threadWrittenTuples[i]; totalBuffers += threadWrittenBuffers[i]; } - - for (int i = 0; i < xdbcEnv->read_parallelism; i++) { + for (int i = 0; i < xdbcEnv->read_parallelism; i++) + { readThreads[i].join(); } finishedReading.store(true); auto total_deser_time = std::chrono::duration_cast( - std::chrono::steady_clock::now() - start_deser).count(); - - spdlog::get("XDBC.SERVER")->info("Read+Deser | Elapsed time: {0} ms for #tuples: {1}, #buffers: {2}", - total_deser_time / 1000, - totalTuples, totalBuffers); + std::chrono::steady_clock::now() - start_deser) + .count(); + xdbcEnv->env_manager2.stop(); // *** Stop Reconfigurration handler + spdlog::get("XDBC.SERVER")->info("Read+Deser | Elapsed time: {0} ms for #tuples: {1}, #buffers: {2}", total_deser_time / 1000, totalTuples, totalBuffers); } -int CSVReader::readCSV(int thr) { +int CSVReader::readCSV(int thr) +{ xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "start"}); - //auto fileName = "/dev/shm/" + tableName + "_" + thrStrNum + ".csv"; + // auto fileName = "/dev/shm/" + tableName + "_" + thrStrNum + ".csv"; auto fileName = "/dev/shm/" + tableName + ".csv"; std::ifstream file(fileName); - if (!file.is_open()) { + if (!file.is_open()) + { spdlog::get("XDBC.SERVER")->error("CSV thread {0} error opening file", thr); return 0; } - //spdlog::get("XDBC.SERVER")->info("CSV thread {0}: Entered to read file {1}", thr, fileName); + // spdlog::get("XDBC.SERVER")->info("CSV thread {0}: Entered to read file {1}", thr, fileName); int curBid = xdbcEnv->freeBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "pop"}); @@ -215,18 +246,22 @@ int CSVReader::readCSV(int thr) { size_t tuplesRead = 0; size_t tuplesWritten = 0; - while (curPart.id != -1) { - //skip to our starting offset - while (currentLine < curPart.startOff && std::getline(file, line)) { + while (curPart.id != -1) + { + // skip to our starting offset + while (currentLine < curPart.startOff && std::getline(file, line)) + { currentLine++; } - while (currentLine < curPart.endOff && std::getline(file, line)) { + while (currentLine < curPart.endOff && std::getline(file, line)) + { line += "\n"; tuplesRead++; size_t lineSize = line.size(); - if ((writePtr - bp[curBid].data() + lineSize) > (bp[curBid].size() - sizeof(Header))) { + if ((writePtr - bp[curBid].data() + lineSize) > (bp[curBid].size() - sizeof(Header))) + { // Buffer is full, send it and fetch a new buffer Header head{}; @@ -243,7 +278,7 @@ int CSVReader::readCSV(int thr) { curBid = xdbcEnv->freeBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "pop"}); - //spdlog::get("XDBC.SERVER")->info("CSV thread {0}: got buff {1} ", thr, curBid); + // spdlog::get("XDBC.SERVER")->info("CSV thread {0}: got buff {1} ", thr, curBid); writePtr = bp[curBid].data() + sizeof(Header); buffersRead++; @@ -259,14 +294,12 @@ int CSVReader::readCSV(int thr) { currentLine = 0; curPart = xdbcEnv->partPtr->pop(); - - } Header head{}; head.totalSize = sizeWritten; head.totalTuples = tuplesWritten; - //send the last buffer & notify the end + // send the last buffer & notify the end std::memcpy(bp[curBid].data(), &head, sizeof(Header)); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "push"}); @@ -276,7 +309,8 @@ int CSVReader::readCSV(int thr) { xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "end"}); xdbcEnv->finishedReadThreads.fetch_add(1); - if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) { + if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + { for (int i = 0; i < xdbcEnv->deser_parallelism; i++) xdbcEnv->deserBufferPtr->push(-1); } @@ -284,18 +318,23 @@ int CSVReader::readCSV(int thr) { file.close(); spdlog::get("XDBC.SERVER")->info("Read thr {0} finished reading", thr); - spdlog::get("XDBC.SERVER")->info("Read thread {0} finished. #tuples: {1}, #buffers {2}", - thr, tuplesRead, buffersRead); + spdlog::get("XDBC.SERVER")->info("Read thread {0} finished. #tuples: {1}, #buffers {2}", thr, tuplesRead, buffersRead); return 1; } -int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) { +// int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) +int CSVReader::deserializeCSV(int thr) +{ - xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "start"}); + int totalThreadWrittenTuples; + int totalThreadWrittenBuffers; + xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "start"}); - if (xdbcEnv->skip_deserializer) { - while (true) { + if (xdbcEnv->skip_deserializer) + { + while (true) + { int inBid = xdbcEnv->deserBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "pop"}); @@ -306,7 +345,9 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total xdbcEnv->compBufferPtr->push(inBid); totalThreadWrittenBuffers++; } - } else { + } + else + { int outBid; size_t readOffset = 0; @@ -321,77 +362,89 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total size_t schemaSize = xdbcEnv->schema.size(); - //TODO: add deserializer based on requirements + // TODO: add deserializer based on requirements std::vector sizes(schemaSize); std::vector schemaChars(schemaSize); using DeserializeFunc = void (*)(const char *src, const char *end, void *dest, int attSize, size_t len); std::vector deserializers(schemaSize); std::vector, int, - size_t)>> arrowDeserializers(schemaSize); + size_t)>> + arrowDeserializers(schemaSize); std::vector> arrowBuilders(schemaSize); std::vector> fields; - for (size_t i = 0; i < schemaSize; ++i) { + for (size_t i = 0; i < schemaSize; ++i) + { std::shared_ptr dataType; - if (xdbcEnv->schema[i].tpe[0] == 'I') { + if (xdbcEnv->schema[i].tpe[0] == 'I') + { sizes[i] = 4; // sizeof(int) schemaChars[i] = 'I'; deserializers[i] = deserialize; arrowDeserializers[i] = deserialize_arrow; arrowBuilders[i] = std::make_shared(); dataType = arrow::int32(); - } else if (xdbcEnv->schema[i].tpe[0] == 'D') { + } + else if (xdbcEnv->schema[i].tpe[0] == 'D') + { sizes[i] = 8; // sizeof(double) schemaChars[i] = 'D'; deserializers[i] = deserialize; arrowDeserializers[i] = deserialize_arrow; arrowBuilders[i] = std::make_shared(); dataType = arrow::float64(); - } else if (xdbcEnv->schema[i].tpe[0] == 'C') { + } + else if (xdbcEnv->schema[i].tpe[0] == 'C') + { sizes[i] = 1; // sizeof(char) schemaChars[i] = 'C'; deserializers[i] = deserialize; arrowDeserializers[i] = deserialize_arrow; arrowBuilders[i] = std::make_shared( - arrow::fixed_size_binary(1)); + arrow::fixed_size_binary(1)); dataType = arrow::fixed_size_binary(1); - } else if (xdbcEnv->schema[i].tpe[0] == 'S') { + } + else if (xdbcEnv->schema[i].tpe[0] == 'S') + { sizes[i] = xdbcEnv->schema[i].size; schemaChars[i] = 'S'; deserializers[i] = deserialize; arrowDeserializers[i] = deserialize_arrow; arrowBuilders[i] = std::make_shared( - arrow::fixed_size_binary(sizes[i])); + arrow::fixed_size_binary(sizes[i])); dataType = arrow::fixed_size_binary(sizes[i]); } fields.push_back(arrow::field(xdbcEnv->schema[i].name, dataType)); } auto arrowSchema = std::make_shared(fields); - //spdlog::info("Arrow Schema: {}", arrowSchema->ToString()); + // spdlog::info("Arrow Schema: {}", arrowSchema->ToString()); outBid = xdbcEnv->freeBufferPtr->pop(); int inBid = xdbcEnv->deserBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "pop"}); - while (true) { + while (true) + { const std::vector &curReadBufferRef = bp[inBid]; const auto *header = reinterpret_cast(curReadBufferRef.data()); const std::byte *dataAfterHeader = curReadBufferRef.data() + sizeof(Header); - while (readOffset < header->totalSize) { + while (readOffset < header->totalSize) + { startReadPtr = reinterpret_cast(dataAfterHeader + readOffset); startWritePtr = bp[outBid].data() + sizeof(Header); bytesInTuple = 0; - for (int attPos = 0; attPos < schemaSize; attPos++) { + for (int attPos = 0; attPos < schemaSize; attPos++) + { - //spdlog::get("XDBC.SERVER")->info("CSV Deser thread {0} processing schema", thr); + // spdlog::get("XDBC.SERVER")->info("CSV Deser thread {0} processing schema", thr); auto &attribute = xdbcEnv->schema[attPos]; @@ -404,16 +457,19 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total const char *tmpEnd = tmpPtr + len; startReadPtr = endPtr + 1; - if (xdbcEnv->iformat == 1 || xdbcEnv->iformat == 2) { + if (xdbcEnv->iformat == 1 || xdbcEnv->iformat == 2) + { // Determine the write pointer based on row or column format void *write = (xdbcEnv->iformat == 1) - ? startWritePtr + bufferTupleId * xdbcEnv->tuple_size + bytesInTuple - : startWritePtr + bytesInTuple * xdbcEnv->tuples_per_buffer + - bufferTupleId * sizes[attPos]; + ? startWritePtr + bufferTupleId * xdbcEnv->tuple_size + bytesInTuple + : startWritePtr + bytesInTuple * xdbcEnv->tuples_per_buffer + + bufferTupleId * sizes[attPos]; // Use CSV deserializers deserializers[attPos](tmpPtr, tmpEnd, write, attribute.size, len); - } else if (xdbcEnv->iformat == 3) { + } + else if (xdbcEnv->iformat == 3) + { // Format 3: Arrow arrowDeserializers[attPos](tmpPtr, tmpEnd, arrowBuilders[attPos], sizes[attPos], len); @@ -421,13 +477,13 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total bytesInTuple += attribute.size; readOffset += len + 1; - } bufferTupleId++; totalThreadWrittenTuples++; - if (bufferTupleId == xdbcEnv->tuples_per_buffer && (xdbcEnv->iformat == 1 || xdbcEnv->iformat == 2)) { + if (bufferTupleId == xdbcEnv->tuples_per_buffer && (xdbcEnv->iformat == 1 || xdbcEnv->iformat == 2)) + { Header head{}; head.totalTuples = bufferTupleId; head.totalSize = head.totalTuples * xdbcEnv->tuple_size; @@ -438,19 +494,20 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total totalThreadWrittenBuffers++; xdbcEnv->pts->push( - ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "push"}); + ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "push"}); xdbcEnv->compBufferPtr->push(outBid); outBid = xdbcEnv->freeBufferPtr->pop(); - - } else if (bufferTupleId == xdbcEnv->tuples_per_buffer - 1000 && xdbcEnv->iformat == 3) { + } + else if (bufferTupleId == xdbcEnv->tuples_per_buffer - 1000 && xdbcEnv->iformat == 3) + { size_t serializedSize = finalizeAndWriteRecordBatchToMemory( - arrowBuilders, // Pass the existing builders - arrowSchema, // Pass the schema - startWritePtr, // Pointer to the memory region - xdbcEnv->buffer_size * 1024 - sizeof(Header), // Available buffer space after the Header - bufferTupleId // Number of rows in the current batch + arrowBuilders, // Pass the existing builders + arrowSchema, // Pass the schema + startWritePtr, // Pointer to the memory region + xdbcEnv->buffer_size * 1024 - sizeof(Header), // Available buffer space after the Header + bufferTupleId // Number of rows in the current batch ); Header head{}; @@ -463,34 +520,34 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total totalThreadWrittenBuffers++; xdbcEnv->pts->push( - ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "push"}); + ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "push"}); xdbcEnv->compBufferPtr->push(outBid); - for (auto &builder: arrowBuilders) { + for (auto &builder : arrowBuilders) + { builder->Reset(); } outBid = xdbcEnv->freeBufferPtr->pop(); } } - //we are done with reading the incoming buffer contents, return it and get a new one + // we are done with reading the incoming buffer contents, return it and get a new one readOffset = 0; xdbcEnv->freeBufferPtr->push(inBid); inBid = xdbcEnv->deserBufferPtr->pop(); if (inBid == -1) break; xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "pop"}); - } - //remaining tuples + // remaining tuples if (bufferTupleId > 0 && bufferTupleId != xdbcEnv->tuples_per_buffer && - (xdbcEnv->iformat == 1 || xdbcEnv->iformat == 2)) { - spdlog::get("XDBC.SERVER")->info("CSV Deser thread {0} has {1} remaining tuples", - thr, xdbcEnv->tuples_per_buffer - bufferTupleId); + (xdbcEnv->iformat == 1 || xdbcEnv->iformat == 2)) + { + spdlog::get("XDBC.SERVER")->info("CSV Deser thread {0} has {1} remaining tuples", thr, xdbcEnv->tuples_per_buffer - bufferTupleId); - //write tuple count to tmp header + // write tuple count to tmp header Header head{}; head.totalTuples = bufferTupleId; head.totalSize = head.totalTuples * xdbcEnv->tuple_size; @@ -505,13 +562,15 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total xdbcEnv->compBufferPtr->push(outBid); totalThreadWrittenBuffers++; - } else if (bufferTupleId > 0 && bufferTupleId != xdbcEnv->tuples_per_buffer && xdbcEnv->iformat == 3) { + } + else if (bufferTupleId > 0 && bufferTupleId != xdbcEnv->tuples_per_buffer && xdbcEnv->iformat == 3) + { size_t serializedSize = finalizeAndWriteRecordBatchToMemory( - arrowBuilders, // Pass the existing builders - arrowSchema, // Pass the schema - startWritePtr, // Pointer to the memory region - xdbcEnv->buffer_size * 1024 - sizeof(Header), // Available buffer space after the Header - bufferTupleId // Number of rows in the current batch + arrowBuilders, // Pass the existing builders + arrowSchema, // Pass the schema + startWritePtr, // Pointer to the memory region + xdbcEnv->buffer_size * 1024 - sizeof(Header), // Available buffer space after the Header + bufferTupleId // Number of rows in the current batch ); Header head{}; @@ -523,22 +582,21 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total totalThreadWrittenBuffers++; xdbcEnv->compBufferPtr->push(outBid); - for (auto &builder: arrowBuilders) { + for (auto &builder : arrowBuilders) + { builder->Reset(); } - } } - /*else spdlog::get("XDBC.SERVER")->info("CSV thread {0} has no remaining tuples", thr);*/ - spdlog::get("XDBC.SERVER")->info("CSV Deser thread {0} finished. buffers: {1}, tuples {2}", - thr, totalThreadWrittenBuffers, totalThreadWrittenTuples); + spdlog::get("XDBC.SERVER")->info("CSV Deser thread {0} finished. buffers: {1}, tuples {2}", thr, totalThreadWrittenBuffers, totalThreadWrittenTuples); xdbcEnv->finishedDeserThreads.fetch_add(1); - if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) { + if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) + { for (int i = 0; i < xdbcEnv->compression_parallelism; i++) xdbcEnv->compBufferPtr->push(-1); } @@ -548,11 +606,12 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total return 1; } - -int CSVReader::getTotalReadBuffers() const { +int CSVReader::getTotalReadBuffers() const +{ return totalReadBuffers; } -bool CSVReader::getFinishedReading() const { +bool CSVReader::getFinishedReading() const +{ return finishedReading; } diff --git a/DataSources/CSVReader/CSVReader.h b/DataSources/CSVReader/CSVReader.h index d2337c6..e6ce87b 100644 --- a/DataSources/CSVReader/CSVReader.h +++ b/DataSources/CSVReader/CSVReader.h @@ -4,8 +4,8 @@ #include #include "../DataSource.h" - -class CSVReader : public DataSource { +class CSVReader : public DataSource +{ public: CSVReader(RuntimeEnv &xdbcEnv, const std::string &tableName); @@ -17,17 +17,14 @@ class CSVReader : public DataSource { void readData() override; private: - int readCSV(int thr); - int deserializeCSV(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers); + int deserializeCSV(int thr); std::atomic finishedReading; std::atomic totalReadBuffers; std::vector> &bp; RuntimeEnv *xdbcEnv; - }; - -#endif //XDBC_SERVER_CSVREADER_H +#endif // XDBC_SERVER_CSVREADER_H diff --git a/DataSources/DataSource.h b/DataSources/DataSource.h index 4f78b7d..d549dd0 100644 --- a/DataSources/DataSource.h +++ b/DataSources/DataSource.h @@ -85,7 +85,8 @@ struct RuntimeEnv std::atomic enable_updation = 0; transfer_details tf_paras; int max_threads = 16; - EnvironmentManager env_manager; + EnvironmentManager env_manager1; + EnvironmentManager env_manager2; PTQ_ptr pts; }; diff --git a/xdbcserver.cpp b/xdbcserver.cpp index b8ddfbb..264c802 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -250,7 +250,7 @@ int XDBCServer::send(int thr, DataSource &dataReader) int XDBCServer::serve() { - xdbcEnv->env_manager.start(); + xdbcEnv->env_manager1.start(); boost::asio::io_context ioContext; boost::asio::ip::tcp::acceptor acceptor(ioContext, boost::asio::ip::tcp::endpoint(boost::asio::ip::tcp::v4(), 1234)); @@ -325,47 +325,42 @@ int XDBCServer::serve() compressorPtr = std::make_unique(*xdbcEnv); //*** Create threads for compress operation - xdbcEnv->env_manager.registerOperation("compress", [&](int thr) - { try { + xdbcEnv->env_manager1.registerOperation("compress", [&](int thr) + { try { if (thr >= xdbcEnv->max_threads) { - spdlog::get("XCLIENT")->error("No of threads exceed limit"); + spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); return; } compressorPtr->compress(thr, xdbcEnv->compression_algorithm); } catch (const std::exception& e) { - spdlog::get("XCLIENT")->error("Exception in thread {}: {}", thr, e.what()); + spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); } catch (...) { - spdlog::get("XCLIENT")->error("Unknown exception in thread {}", thr); + spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); } }, xdbcEnv->compBufferPtr); - xdbcEnv->env_manager.configureThreads("compress", xdbcEnv->compression_parallelism); // start serial component threads + xdbcEnv->env_manager1.configureThreads("compress", xdbcEnv->compression_parallelism); // start compress component threads //*** Finish creating threads for compress operation spdlog::get("XDBC.SERVER")->info("Created compress threads: {0} ", xdbcEnv->compression_parallelism); - // for (int i = 0; i < xdbcEnv->network_parallelism; i++) - // { - // net_threads[i] = std::thread(&XDBCServer::send, this, i, std::ref(*ds)); - // } - // check that sockets are ready - //*** Create threads for send operation - xdbcEnv->env_manager.registerOperation("send", [&](int thr) - { try { + xdbcEnv->env_manager1.registerOperation("send", [&](int thr) + { try { if (thr >= xdbcEnv->max_threads) { - spdlog::get("XCLIENT")->error("No of threads exceed limit"); + spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); return; } send(thr, *ds); } catch (const std::exception& e) { - spdlog::get("XCLIENT")->error("Exception in thread {}: {}", thr, e.what()); + spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); } catch (...) { - spdlog::get("XCLIENT")->error("Unknown exception in thread {}", thr); + spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); } }, xdbcEnv->sendBufferPtr); - xdbcEnv->env_manager.configureThreads("send", xdbcEnv->network_parallelism); // start serial component threads + xdbcEnv->env_manager1.configureThreads("send", xdbcEnv->network_parallelism); // start send component threads //*** Finish creating threads for send operation + // check that sockets are ready int acc = 0; int sendThreadReadyQ = 0; while (acc != xdbcEnv->network_parallelism) @@ -388,15 +383,9 @@ int XDBCServer::serve() // spdlog::get("XDBC.SERVER")->info("Basesocket signaled with bytes: {0} ", bs); // Join all the threads - xdbcEnv->env_manager.joinThreads("compress"); - xdbcEnv->env_manager.joinThreads("send"); - // for (auto &thread : net_threads) - // { - // if (thread.joinable()) - // { - // thread.join(); - // } - // } + xdbcEnv->env_manager1.joinThreads("compress"); + xdbcEnv->env_manager1.configureThreads("send", 0); + xdbcEnv->env_manager1.joinThreads("send"); xdbcEnv->monitor.store(false); _monitorThread.join(); @@ -414,6 +403,6 @@ int XDBCServer::serve() { spdlog::get("XDBC.SERVER")->error("Base socket close error: {0}", ec.message()); } - xdbcEnv->env_manager.stop(); // *** Stop Reconfigurration handler + xdbcEnv->env_manager1.stop(); // *** Stop Reconfigurration handler return 1; } From fabe9cf1adf5aa93abd72458890c03db64191307 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 2 Mar 2025 16:13:30 +0100 Subject: [PATCH 04/19] Use ENv manager for creating threads of CSV components --- DataSources/CSVReader/CSVReader.cpp | 88 ++++++++++++++--------------- DataSources/CSVReader/CSVReader.h | 2 +- DataSources/DataSource.h | 4 +- xdbcserver.cpp | 25 ++++---- 4 files changed, 57 insertions(+), 62 deletions(-) diff --git a/DataSources/CSVReader/CSVReader.cpp b/DataSources/CSVReader/CSVReader.cpp index 6426e40..2088289 100644 --- a/DataSources/CSVReader/CSVReader.cpp +++ b/DataSources/CSVReader/CSVReader.cpp @@ -118,13 +118,11 @@ CSVReader::CSVReader(RuntimeEnv &xdbcEnv, const std::string &tableName) : DataSo void CSVReader::readData() { - xdbcEnv->env_manager2.start(); + xdbcEnv->env_manager_DS.start(); auto start_read = std::chrono::steady_clock::now(); - std::vector threadWrittenTuples(xdbcEnv->deser_parallelism, 0); // Initialize all elements to 0 - std::vector threadWrittenBuffers(xdbcEnv->deser_parallelism, 0); // Initialize all elements to 0 - // int threadWrittenTuples[xdbcEnv->read_parallelism]; - // int threadWrittenBuffers[xdbcEnv->read_parallelism]; + std::vector threadWrittenTuples(xdbcEnv->max_threads, 0); // Initialize all elements to 0 + std::vector threadWrittenBuffers(xdbcEnv->max_threads, 0); // Initialize all elements to 0 std::thread readThreads[xdbcEnv->read_parallelism]; std::thread deSerThreads[xdbcEnv->deser_parallelism]; @@ -160,60 +158,60 @@ void CSVReader::readData() Part fP{}; fP.id = -1; - for (int i = 0; i < xdbcEnv->read_parallelism; i++) - { - xdbcEnv->partPtr->push(fP); - readThreads[i] = std::thread(&CSVReader::readCSV, this, i); + //*** Create threads for read operation + xdbcEnv->env_manager_DS.registerOperation("read", [&](int thr) + { try { + if (thr >= xdbcEnv->max_threads) { + spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); + return; } + xdbcEnv->partPtr->push(fP); + readCSV(thr); + } catch (const std::exception& e) { + spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); + } catch (...) { + spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); + } }, xdbcEnv->freeBufferPtr); + xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start read component threads + //*** Finish creating threads for read operation auto start_deser = std::chrono::steady_clock::now(); - // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) - // { - - // deSerThreads[i] = std::thread(&CSVReader::deserializeCSV, this, i); - // std::ref(threadWrittenTuples[i]), std::ref(threadWrittenBuffers[i])); - // } //*** Create threads for deserialize operation - xdbcEnv->env_manager2.registerOperation("deserialize", [&](int thr) - { try { - if (thr >= xdbcEnv->deser_parallelism) { + xdbcEnv->env_manager_DS.registerOperation("deserialize", [&](int thr) + { try { + if (thr >= xdbcEnv->max_threads) { spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); return; } - // this->deserializeCSV(thr, threadWrittenTuples[thr], threadWrittenBuffers[thr]); - deserializeCSV(thr); + deserializeCSV(thr, threadWrittenTuples[thr], threadWrittenBuffers[thr]); } catch (const std::exception& e) { spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); } catch (...) { spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); } }, xdbcEnv->deserBufferPtr); - xdbcEnv->env_manager2.configureThreads("deserialize", xdbcEnv->deser_parallelism); // start compress component threads + xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); // start deserialize component threads //*** Finish creating threads for deserialize operation + // Wait for read to finish and then kill deserialize + xdbcEnv->env_manager_DS.joinThreads("read"); + xdbcEnv->env_manager_DS.configureThreads("deserialize", 0); + xdbcEnv->env_manager_DS.joinThreads("deserialize"); + int totalTuples = 0; int totalBuffers = 0; - // totalTuples = std::accumulate(threadWrittenTuples.begin(), threadWrittenTuples.end(), 0); - - xdbcEnv->env_manager2.joinThreads("deserialize"); - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) + for (int i = 0; i < xdbcEnv->max_threads; i++) { - // deSerThreads[i].join(); totalTuples += threadWrittenTuples[i]; totalBuffers += threadWrittenBuffers[i]; } - for (int i = 0; i < xdbcEnv->read_parallelism; i++) - { - readThreads[i].join(); - } - finishedReading.store(true); auto total_deser_time = std::chrono::duration_cast( std::chrono::steady_clock::now() - start_deser) .count(); - xdbcEnv->env_manager2.stop(); // *** Stop Reconfigurration handler + xdbcEnv->env_manager_DS.stop(); // *** Stop Reconfigurration handler spdlog::get("XDBC.SERVER")->info("Read+Deser | Elapsed time: {0} ms for #tuples: {1}, #buffers: {2}", total_deser_time / 1000, totalTuples, totalBuffers); } @@ -309,11 +307,11 @@ int CSVReader::readCSV(int thr) xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "end"}); xdbcEnv->finishedReadThreads.fetch_add(1); - if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) - { - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) - xdbcEnv->deserBufferPtr->push(-1); - } + // if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + // { + // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) + // xdbcEnv->deserBufferPtr->push(-1); + // } file.close(); spdlog::get("XDBC.SERVER")->info("Read thr {0} finished reading", thr); @@ -322,13 +320,9 @@ int CSVReader::readCSV(int thr) return 1; } -// int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) -int CSVReader::deserializeCSV(int thr) +int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) { - int totalThreadWrittenTuples; - int totalThreadWrittenBuffers; - xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "start"}); if (xdbcEnv->skip_deserializer) @@ -595,11 +589,11 @@ int CSVReader::deserializeCSV(int thr) spdlog::get("XDBC.SERVER")->info("CSV Deser thread {0} finished. buffers: {1}, tuples {2}", thr, totalThreadWrittenBuffers, totalThreadWrittenTuples); xdbcEnv->finishedDeserThreads.fetch_add(1); - if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) - { - for (int i = 0; i < xdbcEnv->compression_parallelism; i++) - xdbcEnv->compBufferPtr->push(-1); - } + // if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) + // { + // for (int i = 0; i < xdbcEnv->compression_parallelism; i++) + // xdbcEnv->compBufferPtr->push(-1); + // } xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "end"}); diff --git a/DataSources/CSVReader/CSVReader.h b/DataSources/CSVReader/CSVReader.h index e6ce87b..824ffa0 100644 --- a/DataSources/CSVReader/CSVReader.h +++ b/DataSources/CSVReader/CSVReader.h @@ -19,7 +19,7 @@ class CSVReader : public DataSource private: int readCSV(int thr); - int deserializeCSV(int thr); + int deserializeCSV(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers); std::atomic finishedReading; std::atomic totalReadBuffers; diff --git a/DataSources/DataSource.h b/DataSources/DataSource.h index d549dd0..319a53c 100644 --- a/DataSources/DataSource.h +++ b/DataSources/DataSource.h @@ -85,8 +85,8 @@ struct RuntimeEnv std::atomic enable_updation = 0; transfer_details tf_paras; int max_threads = 16; - EnvironmentManager env_manager1; - EnvironmentManager env_manager2; + EnvironmentManager env_manager_xServer; + EnvironmentManager env_manager_DS; PTQ_ptr pts; }; diff --git a/xdbcserver.cpp b/xdbcserver.cpp index 264c802..9857579 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -250,7 +250,7 @@ int XDBCServer::send(int thr, DataSource &dataReader) int XDBCServer::serve() { - xdbcEnv->env_manager1.start(); + xdbcEnv->env_manager_xServer.start(); boost::asio::io_context ioContext; boost::asio::ip::tcp::acceptor acceptor(ioContext, boost::asio::ip::tcp::endpoint(boost::asio::ip::tcp::v4(), 1234)); @@ -325,8 +325,8 @@ int XDBCServer::serve() compressorPtr = std::make_unique(*xdbcEnv); //*** Create threads for compress operation - xdbcEnv->env_manager1.registerOperation("compress", [&](int thr) - { try { + xdbcEnv->env_manager_xServer.registerOperation("compress", [&](int thr) + { try { if (thr >= xdbcEnv->max_threads) { spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); return; @@ -338,14 +338,14 @@ int XDBCServer::serve() spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); } }, xdbcEnv->compBufferPtr); - xdbcEnv->env_manager1.configureThreads("compress", xdbcEnv->compression_parallelism); // start compress component threads + xdbcEnv->env_manager_xServer.configureThreads("compress", xdbcEnv->compression_parallelism); // start compress component threads //*** Finish creating threads for compress operation spdlog::get("XDBC.SERVER")->info("Created compress threads: {0} ", xdbcEnv->compression_parallelism); //*** Create threads for send operation - xdbcEnv->env_manager1.registerOperation("send", [&](int thr) - { try { + xdbcEnv->env_manager_xServer.registerOperation("send", [&](int thr) + { try { if (thr >= xdbcEnv->max_threads) { spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); return; @@ -357,7 +357,7 @@ int XDBCServer::serve() spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); } }, xdbcEnv->sendBufferPtr); - xdbcEnv->env_manager1.configureThreads("send", xdbcEnv->network_parallelism); // start send component threads + xdbcEnv->env_manager_xServer.configureThreads("send", xdbcEnv->network_parallelism); // start send component threads //*** Finish creating threads for send operation // check that sockets are ready @@ -383,14 +383,15 @@ int XDBCServer::serve() // spdlog::get("XDBC.SERVER")->info("Basesocket signaled with bytes: {0} ", bs); // Join all the threads - xdbcEnv->env_manager1.joinThreads("compress"); - xdbcEnv->env_manager1.configureThreads("send", 0); - xdbcEnv->env_manager1.joinThreads("send"); + t1.join(); + xdbcEnv->env_manager_xServer.configureThreads("compress", 0); + xdbcEnv->env_manager_xServer.joinThreads("compress"); + xdbcEnv->env_manager_xServer.configureThreads("send", 0); + xdbcEnv->env_manager_xServer.joinThreads("send"); xdbcEnv->monitor.store(false); _monitorThread.join(); - t1.join(); boost::system::error_code ec; baseSocket.shutdown(boost::asio::ip::tcp::socket::shutdown_both, ec); if (ec) @@ -403,6 +404,6 @@ int XDBCServer::serve() { spdlog::get("XDBC.SERVER")->error("Base socket close error: {0}", ec.message()); } - xdbcEnv->env_manager1.stop(); // *** Stop Reconfigurration handler + xdbcEnv->env_manager_xServer.stop(); // *** Stop Reconfigurration handler return 1; } From 00b5e9aec5046a7d8480137b091d31d60c442dda Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 2 Mar 2025 16:38:28 +0100 Subject: [PATCH 05/19] Use Env Manager to create threads in CHReader, PGReader and PQReader --- DataSources/CHReader/CHReader.cpp | 189 +++++++++++--------- DataSources/PGReader/PGReader.cpp | 277 ++++++++++++++++++------------ DataSources/PQReader/PQReader.cpp | 236 +++++++++++++++---------- 3 files changed, 415 insertions(+), 287 deletions(-) diff --git a/DataSources/CHReader/CHReader.cpp b/DataSources/CHReader/CHReader.cpp index a4fd297..76eff14 100644 --- a/DataSources/CHReader/CHReader.cpp +++ b/DataSources/CHReader/CHReader.cpp @@ -13,42 +13,44 @@ using namespace std; using namespace boost::asio; using ip::tcp; -//TODO: refactor for new buffer_size -> tuples_per_buffer and deserialization method +// TODO: refactor for new buffer_size -> tuples_per_buffer and deserialization method -CHReader::CHReader(RuntimeEnv &xdbcEnv, const std::string tableName) : - DataSource(xdbcEnv, tableName), - bp(*xdbcEnv.bpPtr), - totalReadBuffers(0), - finishedReading(false), - xdbcEnv(&xdbcEnv), - tableName(tableName) { +CHReader::CHReader(RuntimeEnv &xdbcEnv, const std::string tableName) : DataSource(xdbcEnv, tableName), + bp(*xdbcEnv.bpPtr), + totalReadBuffers(0), + finishedReading(false), + xdbcEnv(&xdbcEnv), + tableName(tableName) +{ spdlog::get("XDBC.SERVER")->info("CH Reader, table schema:\n{0}", formatSchema(xdbcEnv.schema)); - } -int CHReader::getTotalReadBuffers() const { +int CHReader::getTotalReadBuffers() const +{ return totalReadBuffers; } -bool CHReader::getFinishedReading() const { +bool CHReader::getFinishedReading() const +{ return finishedReading; } -void CHReader::readData() { +void CHReader::readData() +{ + xdbcEnv->env_manager_DS.start(); auto start = std::chrono::steady_clock::now(); int totalCnt = 0; spdlog::get("XDBC.SERVER")->info("Using CH cpp lib, parallelism: {0}", xdbcEnv->read_parallelism); - int threadWrittenTuples[xdbcEnv->read_parallelism]; - int threadWrittenBuffers[xdbcEnv->read_parallelism]; + std::vector threadWrittenTuples(xdbcEnv->max_threads, 0); // Initialize all elements to 0 + std::vector threadWrittenBuffers(xdbcEnv->max_threads, 0); // Initialize all elements to 0 thread threads[xdbcEnv->read_parallelism]; // TODO: throw something when table does not exist int maxRowNum = getMaxRowNum(tableName); - int partNum = xdbcEnv->read_partitions; div_t partSizeDiv = div(maxRowNum, partNum); @@ -57,8 +59,8 @@ void CHReader::readData() { if (partSizeDiv.rem > 0) partSize++; - - for (int i = partNum - 1; i >= 0; i--) { + for (int i = partNum - 1; i >= 0; i--) + { Part p; p.id = i; p.startOff = i * partSize; @@ -68,22 +70,30 @@ void CHReader::readData() { p.endOff = UINT32_MAX; partStack.push(p); - } - for (int i = 0; i < xdbcEnv->read_parallelism; i++) { - - threads[i] = std::thread(&CHReader::chWriteToBp, - this, i, - std::ref(threadWrittenTuples[i]), std::ref(threadWrittenBuffers[i]) - ); - threadWrittenTuples[i] = 0; - threadWrittenBuffers[i] = 0; - + //*** Create threads for deserialize operation + xdbcEnv->env_manager_DS.registerOperation("read", [&](int thr) + { try { + if (thr >= xdbcEnv->max_threads) { + spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); + return; } + chWriteToBp(thr, threadWrittenTuples[thr], threadWrittenBuffers[thr]); + } catch (const std::exception& e) { + spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); + } catch (...) { + spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); + } }, xdbcEnv->freeBufferPtr); + xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start deserialize component threads + //*** Finish creating threads for deserialize operation + + // Wait for read to finish + xdbcEnv->env_manager_DS.joinThreads("read"); + int total = 0; - for (int i = 0; i < xdbcEnv->read_parallelism; i++) { - threads[i].join(); + for (int i = 0; i < xdbcEnv->max_threads; i++) + { total += threadWrittenTuples[i]; } @@ -91,17 +101,17 @@ void CHReader::readData() { totalCnt += total; auto end = std::chrono::steady_clock::now(); - spdlog::get("XDBC.SERVER")->info("Read | Elapsed time: {0} ms for #tuples: {1}", - std::chrono::duration_cast(end - start).count(), - totalCnt); + xdbcEnv->env_manager_DS.stop(); // *** Stop Reconfigurration handler + spdlog::get("XDBC.SERVER")->info("Read | Elapsed time: {0} ms for #tuples: {1}", std::chrono::duration_cast(end - start).count(), totalCnt); Client client(ClientOptions().SetHost("ch").SetPort(9000)); client.Execute("DROP VIEW tmp_view"); - //return 0; + // return 0; } -int CHReader::getMaxRowNum(const string &tableName) { +int CHReader::getMaxRowNum(const string &tableName) +{ spdlog::get("XDBC.SERVER")->info("CH getMaxRowNum"); // TODO: check connection properly @@ -115,20 +125,20 @@ int CHReader::getMaxRowNum(const string &tableName) { int max = 0; string q = "SELECT CAST(max(rowNumberInAllBlocks()) AS Int32) AS maxrid FROM " + tableName; - client.Select(q, [&max](const Block &block) { + client.Select(q, [&max](const Block &block) + { for (size_t i = 0; i < block.GetRowCount(); ++i) { max = block[0]->As()->At(i); - } - } - ); + } }); spdlog::get("XDBC.SERVER")->info("CH getMaxNumRow: {0}, query: {1} ", max, q); return max; } -int CHReader::chWriteToBp(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) { +int CHReader::chWriteToBp(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) +{ int minBId = thr * (xdbcEnv->buffers_in_bufferpool / xdbcEnv->read_parallelism); int maxBId = (thr + 1) * (xdbcEnv->buffers_in_bufferpool / xdbcEnv->read_parallelism); @@ -140,24 +150,27 @@ int CHReader::chWriteToBp(int thr, int &totalThreadWrittenTuples, int &totalThre int curBid = xdbcEnv->freeBufferPtr->pop(); int bufferTupleId = 0; - while (true) { + while (true) + { std::unique_lock lock(partStackMutex); - if (!partStack.empty()) { + if (!partStack.empty()) + { Part part = partStack.top(); partStack.pop(); lock.unlock(); - //TODO: fix dynamic schema - //TODO: fix clickhouse partitioning + // TODO: fix dynamic schema + // TODO: fix clickhouse partitioning std::string qStr = - "SELECT " + getAttributesAsStr(xdbcEnv->schema) + - //" FROM (SELECT rowNumberInAllBlocks() as row_no,* FROM " + tableName + - //" ORDER BY l_orderkey, l_partkey, l_suppkey)" + - " FROM tmp_view" - " WHERE row_no >= " + std::to_string(part.startOff) + - " AND row_no < " + std::to_string(part.endOff); + "SELECT " + getAttributesAsStr(xdbcEnv->schema) + + //" FROM (SELECT rowNumberInAllBlocks() as row_no,* FROM " + tableName + + //" ORDER BY l_orderkey, l_partkey, l_suppkey)" + + " FROM tmp_view" + " WHERE row_no >= " + + std::to_string(part.startOff) + + " AND row_no < " + std::to_string(part.endOff); spdlog::get("XDBC.SERVER")->info("CH thread {0} runs query: {1}", thr, qStr); /*std::string qStr = "SELECT rowNumberInAllBlocks() as row_no,* FROM " + tableName + @@ -167,73 +180,82 @@ int CHReader::chWriteToBp(int thr, int &totalThreadWrittenTuples, int &totalThre client.Select(qStr, [this, &curBid, &totalThreadWrittenBuffers, &bufferTupleId, &totalThreadWrittenTuples, &thr, &schemaSize]( - const Block &block) { - - - for (size_t i = 0; i < block.GetRowCount(); i++) { + const Block &block) + { + for (size_t i = 0; i < block.GetRowCount(); i++) + { auto bpPtr = bp[curBid].data(); int ti = 0; int bytesInTuple = 0; - for (int attPos = 0; attPos < schemaSize; attPos++) { + for (int attPos = 0; attPos < schemaSize; attPos++) + { auto &attribute = xdbcEnv->schema[attPos]; void *writePtr; - if (xdbcEnv->iformat == 1) { + if (xdbcEnv->iformat == 1) + { writePtr = bpPtr + bufferTupleId * xdbcEnv->tuple_size + bytesInTuple; - } else if (xdbcEnv->iformat == 2) { + } + else if (xdbcEnv->iformat == 2) + { writePtr = bpPtr + bytesInTuple * xdbcEnv->buffer_size + bufferTupleId * attribute.size; } - if (attribute.tpe == "INT") { + if (attribute.tpe == "INT") + { memcpy(writePtr, &block[ti]->As()->At(i), 4); bytesInTuple += attribute.size; - } else if (attribute.tpe == "DOUBLE") { + } + else if (attribute.tpe == "DOUBLE") + { // TODO: fix decimal/double column auto col = block[ti]->As(); - auto val = (double) col->At(i) * 0.01; + auto val = (double)col->At(i) * 0.01; memcpy(writePtr, &val, 8); bytesInTuple += attribute.size; } ti++; - } totalThreadWrittenTuples++; bufferTupleId++; - if (bufferTupleId == xdbcEnv->buffer_size) { - //cout << "wrote buffer " << bufferId << endl; + if (bufferTupleId == xdbcEnv->buffer_size) + { + // cout << "wrote buffer " << bufferId << endl; bufferTupleId = 0; - //totalReadBuffers.fetch_add(1); + // totalReadBuffers.fetch_add(1); totalThreadWrittenBuffers++; xdbcEnv->compBufferPtr->push(curBid); curBid = xdbcEnv->freeBufferPtr->pop(); - } } - } - ); + }); - //remaining tuples - if (totalReadBuffers > 0 && bufferTupleId != xdbcEnv->buffer_size) { - spdlog::get("XDBC.SERVER")->info("CH thread {0} has {1} remaining tuples", - thr, xdbcEnv->buffer_size - bufferTupleId); + // remaining tuples + if (totalReadBuffers > 0 && bufferTupleId != xdbcEnv->buffer_size) + { + spdlog::get("XDBC.SERVER")->info("CH thread {0} has {1} remaining tuples", thr, xdbcEnv->buffer_size - bufferTupleId); - //TODO: remove dirty fix, potentially with buffer header or resizable buffers + // TODO: remove dirty fix, potentially with buffer header or resizable buffers int mone = -1; - for (int i = bufferTupleId; i < xdbcEnv->buffer_size; i++) { + for (int i = bufferTupleId; i < xdbcEnv->buffer_size; i++) + { void *writePtr; - if (xdbcEnv->iformat == 1) { + if (xdbcEnv->iformat == 1) + { writePtr = bp[curBid].data() + bufferTupleId * xdbcEnv->tuple_size; - } else if (xdbcEnv->iformat == 2) { + } + else if (xdbcEnv->iformat == 2) + { writePtr = bp[curBid].data() + bufferTupleId * xdbcEnv->schema[0].size; } @@ -244,19 +266,20 @@ int CHReader::chWriteToBp(int thr, int &totalThreadWrittenTuples, int &totalThre totalReadBuffers.fetch_add(1); totalThreadWrittenBuffers++; } - spdlog::get("XDBC.SERVER")->info("CH thread {0} wrote buffers: {1}, tuples {2}", - thr, totalThreadWrittenBuffers, totalThreadWrittenTuples); - } else { + spdlog::get("XDBC.SERVER")->info("CH thread {0} wrote buffers: {1}, tuples {2}", thr, totalThreadWrittenBuffers, totalThreadWrittenTuples); + } + else + { break; } - } - //notify that we finished + // notify that we finished xdbcEnv->finishedReadThreads.fetch_add(1); - if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) { - for (int i = 0; i < xdbcEnv->compression_parallelism; i++) - xdbcEnv->compBufferPtr->push(-1); - } + // if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + // { + // for (int i = 0; i < xdbcEnv->compression_parallelism; i++) + // xdbcEnv->compBufferPtr->push(-1); + // } return 1; } diff --git a/DataSources/PGReader/PGReader.cpp b/DataSources/PGReader/PGReader.cpp index 198ad6d..95e5a90 100644 --- a/DataSources/PGReader/PGReader.cpp +++ b/DataSources/PGReader/PGReader.cpp @@ -19,14 +19,16 @@ using namespace pqxx; using namespace boost::asio; using ip::tcp; -//TODO: refactor for new buffer_size -> tuples_per_buffer and deserialization method +// TODO: refactor for new buffer_size -> tuples_per_buffer and deserialization method -std::vector splitStr(std::string const &original, char separator) { +std::vector splitStr(std::string const &original, char separator) +{ std::vector results; std::string::const_iterator start = original.begin(); std::string::const_iterator end = original.end(); std::string::const_iterator next = std::find(start, end, separator); - while (next != end) { + while (next != end) + { results.emplace_back(start, next); start = next + 1; next = std::find(start, end, separator); @@ -35,64 +37,79 @@ std::vector splitStr(std::string const &original, char separator) { return results; } -int fast_atoi(const char *str) { +int fast_atoi(const char *str) +{ int val = 0; - while (*str) { + while (*str) + { val = val * 10 + (*str++ - '0'); } return val; } -unsigned int naive(const char *p) { +unsigned int naive(const char *p) +{ unsigned int x = 0; - while (*p != '\0') { + while (*p != '\0') + { x = (x * 10) + (*p - '0'); ++p; } return x; } -enum STR2INT_ERROR { - SUCCESS, OVERFLOW, UNDERFLOW, INCONVERTIBLE +enum STR2INT_ERROR +{ + SUCCESS, + OVERFLOW, + UNDERFLOW, + INCONVERTIBLE }; -STR2INT_ERROR str2int(int &i, char const *s, int base = 0) { +STR2INT_ERROR str2int(int &i, char const *s, int base = 0) +{ char *end; long l; errno = 0; l = strtol(s, &end, base); - if ((errno == ERANGE && l == LONG_MAX) || l > INT_MAX) { + if ((errno == ERANGE && l == LONG_MAX) || l > INT_MAX) + { return OVERFLOW; } - if ((errno == ERANGE && l == LONG_MIN) || l < INT_MIN) { + if ((errno == ERANGE && l == LONG_MIN) || l < INT_MIN) + { return UNDERFLOW; } - if (*s == '\0' || *end != '\0') { + if (*s == '\0' || *end != '\0') + { return INCONVERTIBLE; } i = l; return SUCCESS; } -PGReader::PGReader(RuntimeEnv &xdbcEnv, const std::string &tableName) : - DataSource(xdbcEnv, tableName), - bp(*xdbcEnv.bpPtr), - totalReadBuffers(0), - finishedReading(false), - xdbcEnv(&xdbcEnv) { +PGReader::PGReader(RuntimeEnv &xdbcEnv, const std::string &tableName) : DataSource(xdbcEnv, tableName), + bp(*xdbcEnv.bpPtr), + totalReadBuffers(0), + finishedReading(false), + xdbcEnv(&xdbcEnv) +{ spdlog::get("XDBC.SERVER")->info("PG Constructor called with table {0}", tableName); } -int PGReader::getTotalReadBuffers() const { +int PGReader::getTotalReadBuffers() const +{ return totalReadBuffers; } -bool PGReader::getFinishedReading() const { +bool PGReader::getFinishedReading() const +{ return finishedReading; } -int PGReader::getMaxCtId(const std::string &tableName) { +int PGReader::getMaxCtId(const std::string &tableName) +{ const char *conninfo; PGconn *connection = NULL; @@ -102,7 +119,7 @@ int PGReader::getMaxCtId(const std::string &tableName) { PGresult *res; std::string qStr = "SELECT (MAX(ctid)::text::point)[0]::bigint AS maxctid FROM " + tableName; - //spdlog::get("XDBC.SERVER")->info("Getting max(ctid) with: {}", qStr); + // spdlog::get("XDBC.SERVER")->info("Getting max(ctid) with: {}", qStr); res = PQexec(connection, qStr.c_str()); int fnum = PQfnumber(res, "maxctid"); @@ -115,7 +132,8 @@ int PGReader::getMaxCtId(const std::string &tableName) { return maxCtId; } -void PGReader::readData() { +void PGReader::readData() +{ auto start = std::chrono::steady_clock::now(); int totalCnt = 0; @@ -123,19 +141,18 @@ void PGReader::readData() { totalCnt = read_pq_copy(); auto end = std::chrono::steady_clock::now(); - spdlog::get("XDBC.SERVER")->info("PGReader | Elapsed time: {0} ms for #tuples: {1}", - std::chrono::duration_cast(end - start).count(), - totalCnt); + spdlog::get("XDBC.SERVER")->info("PGReader | Elapsed time: {0} ms for #tuples: {1}", std::chrono::duration_cast(end - start).count(), totalCnt); } -int PGReader::read_pq_copy() { - +int PGReader::read_pq_copy() +{ + xdbcEnv->env_manager_DS.start(); auto start_read = std::chrono::steady_clock::now(); spdlog::get("XDBC.SERVER")->info("Using pglib with COPY, parallelism: {0}", xdbcEnv->read_parallelism); - int threadWrittenTuples[xdbcEnv->deser_parallelism]; - int threadWrittenBuffers[xdbcEnv->deser_parallelism]; + std::vector threadWrittenTuples(xdbcEnv->max_threads, 0); // Initialize all elements to 0 + std::vector threadWrittenBuffers(xdbcEnv->max_threads, 0); // Initialize all elements to 0 thread readThreads[xdbcEnv->read_parallelism]; thread deSerThreads[xdbcEnv->deser_parallelism]; @@ -151,7 +168,8 @@ int PGReader::read_pq_copy() { if (partSizeDiv.rem > 0) partSize++; - for (int i = partNum - 1; i >= 0; i--) { + for (int i = partNum - 1; i >= 0; i--) + { Part p{}; p.id = i; p.startOff = i * partSize; @@ -162,59 +180,75 @@ int PGReader::read_pq_copy() { xdbcEnv->partPtr->push(p); - spdlog::get("XDBC.SERVER")->info("Partition {0} [{1},{2}] pushed into queue ", - p.id, p.startOff, p.endOff); + spdlog::get("XDBC.SERVER")->info("Partition {0} [{1},{2}] pushed into queue ", p.id, p.startOff, p.endOff); } - //final partition + // final partition Part fP{}; fP.id = -1; - for (int i = 0; i < xdbcEnv->read_parallelism; i++) { - xdbcEnv->partPtr->push(fP); - readThreads[i] = std::thread(&PGReader::readPG, this, i); + //*** Create threads for read operation + xdbcEnv->env_manager_DS.registerOperation("read", [&](int thr) + { try { + if (thr >= xdbcEnv->max_threads) { + spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); + return; } - + xdbcEnv->partPtr->push(fP); + readPG(thr); + } catch (const std::exception& e) { + spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); + } catch (...) { + spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); + } }, xdbcEnv->freeBufferPtr); + xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start read component threads + //*** Finish creating threads for read operation auto start_deser = std::chrono::steady_clock::now(); - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) { - threadWrittenTuples[i] = 0; - threadWrittenBuffers[i] = 0; - - deSerThreads[i] = std::thread(&PGReader::deserializePG, - this, i, - std::ref(threadWrittenTuples[i]), std::ref(threadWrittenBuffers[i]) - ); + //*** Create threads for deserialize operation + xdbcEnv->env_manager_DS.registerOperation("deserialize", [&](int thr) + { try { + if (thr >= xdbcEnv->max_threads) { + spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); + return; } + deserializePG(thr, threadWrittenTuples[thr], threadWrittenBuffers[thr]); + } catch (const std::exception& e) { + spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); + } catch (...) { + spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); + } }, xdbcEnv->deserBufferPtr); + xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); // start deserialize component threads + //*** Finish creating threads for deserialize operation + + // Wait for read to finish and then kill deserialize + xdbcEnv->env_manager_DS.joinThreads("read"); + xdbcEnv->env_manager_DS.configureThreads("deserialize", 0); + xdbcEnv->env_manager_DS.joinThreads("deserialize"); int totalTuples = 0; int totalBuffers = 0; - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) { - deSerThreads[i].join(); + for (int i = 0; i < xdbcEnv->max_threads; i++) + { totalTuples += threadWrittenTuples[i]; totalBuffers += threadWrittenBuffers[i]; } - for (int i = 0; i < xdbcEnv->read_parallelism; i++) { - readThreads[i].join(); - } - finishedReading.store(true); auto end = std::chrono::steady_clock::now(); auto total_read_time = std::chrono::duration_cast(end - start_read).count(); auto total_deser_time = std::chrono::duration_cast(end - start_deser).count(); - - spdlog::get("XDBC.SERVER")->info("PG Read+Deser | Elapsed time: {0} ms for #tuples: {1}, #buffers: {2}", - total_deser_time / 1000, - totalTuples, totalBuffers); + xdbcEnv->env_manager_DS.stop(); // *** Stop Reconfigurration handler + spdlog::get("XDBC.SERVER")->info("PG Read+Deser | Elapsed time: {0} ms for #tuples: {1}, #buffers: {2}", total_deser_time / 1000, totalTuples, totalBuffers); return totalTuples; } -int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) { +int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) +{ xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "start"}); @@ -237,20 +271,28 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh using DeserializeFunc = void (*)(const char *src, const char *end, void *dest, int attSize, size_t len); std::vector deserializers(schemaSize); - for (size_t i = 0; i < schemaSize; ++i) { - if (xdbcEnv->schema[i].tpe[0] == 'I') { + for (size_t i = 0; i < schemaSize; ++i) + { + if (xdbcEnv->schema[i].tpe[0] == 'I') + { sizes[i] = 4; // sizeof(int) schemaChars[i] = 'I'; deserializers[i] = deserialize; - } else if (xdbcEnv->schema[i].tpe[0] == 'D') { + } + else if (xdbcEnv->schema[i].tpe[0] == 'D') + { sizes[i] = 8; // sizeof(double) schemaChars[i] = 'D'; deserializers[i] = deserialize; - } else if (xdbcEnv->schema[i].tpe[0] == 'C') { + } + else if (xdbcEnv->schema[i].tpe[0] == 'C') + { sizes[i] = 1; // sizeof(char) schemaChars[i] = 'C'; deserializers[i] = deserialize; - } else if (xdbcEnv->schema[i].tpe[0] == 'S') { + } + else if (xdbcEnv->schema[i].tpe[0] == 'S') + { sizes[i] = xdbcEnv->schema[i].size; schemaChars[i] = 'S'; deserializers[i] = deserialize; @@ -260,13 +302,15 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh outBid = xdbcEnv->freeBufferPtr->pop(); int inBid = xdbcEnv->deserBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "pop"}); - while (true) { + while (true) + { const std::vector &curReadBufferRef = bp[inBid]; const auto *header = reinterpret_cast(curReadBufferRef.data()); const std::byte *dataAfterHeader = curReadBufferRef.data() + sizeof(Header); - while (readOffset < header->totalSize) { + while (readOffset < header->totalSize) + { startReadPtr = reinterpret_cast(dataAfterHeader + readOffset); //+sizeof(Header) for temp header (totalTuples) @@ -274,9 +318,10 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh bytesInTuple = 0; - for (int attPos = 0; attPos < schemaSize; attPos++) { + for (int attPos = 0; attPos < schemaSize; attPos++) + { - //spdlog::get("XDBC.SERVER")->info("PG Deser thread {0} processing schema", thr); + // spdlog::get("XDBC.SERVER")->info("PG Deser thread {0} processing schema", thr); auto &attribute = xdbcEnv->schema[attPos]; @@ -289,9 +334,12 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh const char *tmpEnd = tmpPtr + len; startReadPtr = endPtr + 1; - if (xdbcEnv->iformat == 1) { + if (xdbcEnv->iformat == 1) + { write = startWritePtr + bufferTupleId * xdbcEnv->tuple_size + bytesInTuple; - } else if (xdbcEnv->iformat == 2) { + } + else if (xdbcEnv->iformat == 2) + { write = startWritePtr + bytesInTuple * xdbcEnv->tuples_per_buffer + bufferTupleId * attribute.size; } @@ -299,13 +347,13 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh bytesInTuple += attribute.size; readOffset += len + 1; - } bufferTupleId++; totalThreadWrittenTuples++; - if (bufferTupleId == xdbcEnv->tuples_per_buffer) { + if (bufferTupleId == xdbcEnv->tuples_per_buffer) + { Header head{}; head.totalTuples = bufferTupleId; head.totalSize = head.totalTuples * xdbcEnv->tuple_size; @@ -316,15 +364,14 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh totalThreadWrittenBuffers++; xdbcEnv->pts->push( - ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "push"}); + ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "push"}); xdbcEnv->compBufferPtr->push(outBid); outBid = xdbcEnv->freeBufferPtr->pop(); - } } - //we are done with reading the incoming buffer contents, return it and get a new one + // we are done with reading the incoming buffer contents, return it and get a new one xdbcEnv->freeBufferPtr->push(inBid); inBid = xdbcEnv->deserBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "pop"}); @@ -333,13 +380,12 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh break; } + // remaining tuples + if (bufferTupleId > 0 && bufferTupleId != xdbcEnv->tuples_per_buffer) + { + spdlog::get("XDBC.SERVER")->info("PG Deser thread {0} has {1} remaining tuples", thr, xdbcEnv->tuples_per_buffer - bufferTupleId); - //remaining tuples - if (bufferTupleId > 0 && bufferTupleId != xdbcEnv->tuples_per_buffer) { - spdlog::get("XDBC.SERVER")->info("PG Deser thread {0} has {1} remaining tuples", - thr, xdbcEnv->tuples_per_buffer - bufferTupleId); - - //write tuple count to tmp header + // write tuple count to tmp header Header head{}; head.totalTuples = bufferTupleId; head.totalSize = head.totalTuples * xdbcEnv->tuple_size; @@ -354,21 +400,22 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh totalThreadWrittenBuffers++; } - spdlog::get("XDBC.SERVER")->info("PG Deser thread {0} finished. buffers: {1}, tuples {2}", - thr, totalThreadWrittenBuffers, totalThreadWrittenTuples); + spdlog::get("XDBC.SERVER")->info("PG Deser thread {0} finished. buffers: {1}, tuples {2}", thr, totalThreadWrittenBuffers, totalThreadWrittenTuples); xdbcEnv->finishedDeserThreads.fetch_add(1); - if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) { - for (int i = 0; i < xdbcEnv->compression_parallelism; i++) - xdbcEnv->compBufferPtr->push(-1); - } + // if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) + // { + // for (int i = 0; i < xdbcEnv->compression_parallelism; i++) + // xdbcEnv->compBufferPtr->push(-1); + // } xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "end"}); return 1; } -int PGReader::readPG(int thr) { +int PGReader::readPG(int thr) +{ xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "start"}); @@ -391,7 +438,8 @@ int PGReader::readPG(int thr) { conninfo = "dbname = db1 user = postgres password = 123456 host = pg1 port = 5432"; connection = PQconnectdb(conninfo); - while (curPart.id != -1) { + while (curPart.id != -1) + { char *receiveBuffer = NULL; int receiveLength = 0; @@ -399,10 +447,10 @@ int PGReader::readPG(int thr) { PGresult *res; std::string qStr = - "COPY (SELECT " + getAttributesAsStr(xdbcEnv->schema) + " FROM " + tableName + - " WHERE ctid BETWEEN '(" + - std::to_string(curPart.startOff) + ",0)'::tid AND '(" + - std::to_string(curPart.endOff) + ",0)'::tid) TO STDOUT WITH (FORMAT text, DELIMITER '|')"; + "COPY (SELECT " + getAttributesAsStr(xdbcEnv->schema) + " FROM " + tableName + + " WHERE ctid BETWEEN '(" + + std::to_string(curPart.startOff) + ",0)'::tid AND '(" + + std::to_string(curPart.endOff) + ",0)'::tid) TO STDOUT WITH (FORMAT text, DELIMITER '|')"; spdlog::get("XDBC.SERVER")->info("PG thread {0} runs query: {1}", thr, qStr); @@ -414,13 +462,14 @@ int PGReader::readPG(int thr) { receiveLength = PQgetCopyData(connection, &receiveBuffer, asynchronous); - spdlog::get("XDBC.SERVER")->info("PG Read thread {0}: Entering PQgetCopyData loop with rcvlen: {1}", thr, - receiveLength); + spdlog::get("XDBC.SERVER")->info("PG Read thread {0}: Entering PQgetCopyData loop with rcvlen: {1}", thr, receiveLength); - while (receiveLength > 0) { + while (receiveLength > 0) + { // Buffer is full, send it and fetch a new buffer - if (((writePtr - bp[curBid].data() + receiveLength) > xdbcEnv->buffer_size * 1024)) { + if (((writePtr - bp[curBid].data() + receiveLength) > xdbcEnv->buffer_size * 1024)) + { Header head{}; head.totalSize = sizeWritten; head.totalTuples = tuplesPerBuffer; @@ -452,27 +501,35 @@ int PGReader::readPG(int thr) { spdlog::get("XDBC.SERVER")->info("PG thread {0}: Exiting PQgetCopyData loop, tupleNo: {1}", thr, tuplesRead); // we now check the last received length returned by copy data - if (receiveLength == 0) { + if (receiveLength == 0) + { // we cannot read more data without blocking spdlog::get("XDBC.SERVER")->warn("PG Reader received 0"); - } else if (receiveLength == -1) { + } + else if (receiveLength == -1) + { /* received copy done message */ PGresult *result = PQgetResult(connection); ExecStatusType resultStatus = PQresultStatus(result); - if (resultStatus != PGRES_COMMAND_OK) { + if (resultStatus != PGRES_COMMAND_OK) + { spdlog::get("XDBC.SERVER")->warn("PG thread {0} Copy failed", thr); - } PQclear(result); - } else if (receiveLength == -2) { + } + else if (receiveLength == -2) + { /* received an error */ spdlog::get("XDBC.SERVER")->warn("PG thread {0} Copy failed bc -2", thr); - } else if (receiveLength < 0) { + } + else if (receiveLength < 0) + { /* if copy out completed, make sure we drain all results from libpq */ PGresult *result = PQgetResult(connection); - while (result != NULL) { + while (result != NULL) + { PQclear(result); result = PQgetResult(connection); } @@ -483,7 +540,7 @@ int PGReader::readPG(int thr) { PQfinish(connection); - //send the last buffer & notify the end + // send the last buffer & notify the end Header head{}; head.totalSize = sizeWritten; head.totalTuples = tuplesPerBuffer; @@ -494,16 +551,16 @@ int PGReader::readPG(int thr) { xdbcEnv->deserBufferPtr->push(curBid); xdbcEnv->finishedReadThreads.fetch_add(1); - if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) { - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) - xdbcEnv->deserBufferPtr->push(-1); - } + // if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + // { + // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) + // xdbcEnv->deserBufferPtr->push(-1); + // } int deserFinishedCounter = 0; xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "end"}); - spdlog::get("XDBC.SERVER")->info("PG read thread {0} finished. #tuples: {1}, #buffers {2}", - thr, tuplesRead, buffersRead); + spdlog::get("XDBC.SERVER")->info("PG read thread {0} finished. #tuples: {1}, #buffers {2}", thr, tuplesRead, buffersRead); return 1; } diff --git a/DataSources/PQReader/PQReader.cpp b/DataSources/PQReader/PQReader.cpp index bc11a44..0ee9ccd 100644 --- a/DataSources/PQReader/PQReader.cpp +++ b/DataSources/PQReader/PQReader.cpp @@ -9,30 +9,28 @@ #include #include -PQReader::PQReader(RuntimeEnv &xdbcEnv, const std::string &tableName) : - DataSource(xdbcEnv, tableName), - bp(*xdbcEnv.bpPtr), - finishedReading(false), - totalReadBuffers(0), - xdbcEnv(&xdbcEnv) { +PQReader::PQReader(RuntimeEnv &xdbcEnv, const std::string &tableName) : DataSource(xdbcEnv, tableName), + bp(*xdbcEnv.bpPtr), + finishedReading(false), + totalReadBuffers(0), + xdbcEnv(&xdbcEnv) +{ spdlog::get("XDBC.SERVER")->info("Parquet Constructor called with table: {0}", tableName); - } - -void PQReader::readData() { +void PQReader::readData() +{ + xdbcEnv->env_manager_DS.start(); auto start_read = std::chrono::steady_clock::now(); - int threadWrittenTuples[xdbcEnv->deser_parallelism]; - int threadWrittenBuffers[xdbcEnv->deser_parallelism]; + std::vector threadWrittenTuples(xdbcEnv->max_threads, 0); // Initialize all elements to 0 + std::vector threadWrittenBuffers(xdbcEnv->max_threads, 0); // Initialize all elements to 0 std::thread readThreads[xdbcEnv->read_parallelism]; std::thread deSerThreads[xdbcEnv->deser_parallelism]; - size_t numFiles = std::distance(std::filesystem::directory_iterator("/dev/shm/" + tableName), std::filesystem::directory_iterator{}); - spdlog::get("XDBC.SERVER")->info("Parquet files: {0}", numFiles); int partNum = xdbcEnv->read_parallelism; @@ -43,7 +41,8 @@ void PQReader::readData() { if (partSizeDiv.rem > 0) partSize++; - for (int i = partNum - 1; i >= 0; i--) { + for (int i = partNum - 1; i >= 0; i--) + { Part p{}; p.id = i; p.startOff = i * partSize; @@ -54,63 +53,79 @@ void PQReader::readData() { xdbcEnv->partPtr->push(p); - spdlog::get("XDBC.SERVER")->info("Partition {0} [{1},{2}] pushed into queue", - p.id, p.startOff, p.endOff); - + spdlog::get("XDBC.SERVER")->info("Partition {0} [{1},{2}] pushed into queue", p.id, p.startOff, p.endOff); } - //final partition + // final partition Part fP{}; fP.id = -1; - for (int i = 0; i < xdbcEnv->read_parallelism; i++) { - xdbcEnv->partPtr->push(fP); - readThreads[i] = std::thread(&PQReader::readPQ, this, i); + //*** Create threads for read operation + xdbcEnv->env_manager_DS.registerOperation("read", [&](int thr) + { try { + if (thr >= xdbcEnv->max_threads) { + spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); + return; } - + xdbcEnv->partPtr->push(fP); + readPQ(thr); + } catch (const std::exception& e) { + spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); + } catch (...) { + spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); + } }, xdbcEnv->freeBufferPtr); + xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start read component threads + //*** Finish creating threads for read operation auto start_deser = std::chrono::steady_clock::now(); - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) { - threadWrittenTuples[i] = 0; - threadWrittenBuffers[i] = 0; - - deSerThreads[i] = std::thread(&PQReader::deserializePQ, - this, i, - std::ref(threadWrittenTuples[i]), std::ref(threadWrittenBuffers[i]) - ); + //*** Create threads for deserialize operation + xdbcEnv->env_manager_DS.registerOperation("deserialize", [&](int thr) + { try { + if (thr >= xdbcEnv->max_threads) { + spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); + return; } + deserializePQ(thr, threadWrittenTuples[thr], threadWrittenBuffers[thr]); + } catch (const std::exception& e) { + spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); + } catch (...) { + spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); + } }, xdbcEnv->deserBufferPtr); + xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); // start deserialize component threads + //*** Finish creating threads for deserialize operation + + // Wait for read to finish and then kill deserialize + xdbcEnv->env_manager_DS.joinThreads("read"); + xdbcEnv->env_manager_DS.configureThreads("deserialize", 0); + xdbcEnv->env_manager_DS.joinThreads("deserialize"); int totalTuples = 0; int totalBuffers = 0; - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) { - deSerThreads[i].join(); + for (int i = 0; i < xdbcEnv->max_threads; i++) + { totalTuples += threadWrittenTuples[i]; totalBuffers += threadWrittenBuffers[i]; } - - for (int i = 0; i < xdbcEnv->read_parallelism; i++) { - readThreads[i].join(); - } - finishedReading.store(true); auto total_deser_time = std::chrono::duration_cast( - std::chrono::steady_clock::now() - start_deser).count(); - - spdlog::get("XDBC.SERVER")->info("Read+Deser | Elapsed time: {0} ms for #tuples: {1}, #buffers: {2}", - total_deser_time / 1000, - totalTuples, totalBuffers); - + std::chrono::steady_clock::now() - start_deser) + .count(); + xdbcEnv->env_manager_DS.stop(); // *** Stop Reconfigurration handler + spdlog::get("XDBC.SERVER")->info("Read+Deser | Elapsed time: {0} ms for #tuples: {1}, #buffers: {2}", total_deser_time / 1000, totalTuples, totalBuffers); } -int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) { +int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) +{ xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "start"}); - if (xdbcEnv->skip_deserializer) { - while (true) { + if (xdbcEnv->skip_deserializer) + { + while (true) + { int inBid = xdbcEnv->deserBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "pop"}); @@ -121,7 +136,9 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh xdbcEnv->compBufferPtr->push(inBid); totalThreadWrittenBuffers++; } - } else { + } + else + { // Pop a buffer from deserBufferPtr auto deserBuff = xdbcEnv->deserBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "pop"}); @@ -137,25 +154,35 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh // Precompute column offsets, sizes, and deserializers std::vector columnOffsets(schemaSize); std::vector columnSizes(schemaSize); - std::vector> deserializers(schemaSize); + std::vector> deserializers(schemaSize); size_t rowSize = 0; - for (size_t i = 0; i < schemaSize; ++i) { + for (size_t i = 0; i < schemaSize; ++i) + { const auto &attr = xdbcEnv->schema[i]; columnOffsets[i] = rowSize; - if (attr.tpe[0] == 'I') { - columnSizes[i] = 4; // sizeof(int) + if (attr.tpe[0] == 'I') + { + columnSizes[i] = 4; // sizeof(int) deserializers[i] = deserialize; - } else if (attr.tpe[0] == 'D') { + } + else if (attr.tpe[0] == 'D') + { columnSizes[i] = 8; deserializers[i] = deserialize; - } else if (attr.tpe[0] == 'C') { - columnSizes[i] = 1; // sizeof(char) + } + else if (attr.tpe[0] == 'C') + { + columnSizes[i] = 1; // sizeof(char) deserializers[i] = deserialize; - } else if (attr.tpe[0] == 'S') { + } + else if (attr.tpe[0] == 'S') + { columnSizes[i] = attr.size; deserializers[i] = deserialize; - } else { + } + else + { throw std::runtime_error("Unsupported column type: " + attr.tpe); } rowSize += columnSizes[i]; @@ -163,14 +190,17 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh // Preallocate fixed-size buffers for string attributes std::vector stringBuffers(schemaSize); - for (int colIdx = 0; colIdx < schemaSize; ++colIdx) { + for (int colIdx = 0; colIdx < schemaSize; ++colIdx) + { const auto &attr = xdbcEnv->schema[colIdx]; - if (attr.tpe[0] == 'S') { // STRING or CHAR + if (attr.tpe[0] == 'S') + { // STRING or CHAR stringBuffers[colIdx].resize(attr.size, '\0'); // Fixed size with null padding } } - while (true) { + while (true) + { // Get buffer data from deserBuff const auto *bufferPtr = bp[deserBuff].data() + sizeof(Header); @@ -188,28 +218,36 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh // Deserialize data using StreamReader - while (!stream.eof()) { + while (!stream.eof()) + { - for (int colIdx = 0; colIdx < schemaSize; ++colIdx) { + for (int colIdx = 0; colIdx < schemaSize; ++colIdx) + { const auto &attr = xdbcEnv->schema[colIdx]; void *dest = nullptr; - if (xdbcEnv->iformat == 1) { + if (xdbcEnv->iformat == 1) + { dest = writeBuffPtr + numRows * xdbcEnv->tuple_size + columnOffsets[colIdx]; - } else if (xdbcEnv->iformat == 2) { + } + else if (xdbcEnv->iformat == 2) + { dest = writeBuffPtr + columnOffsets[colIdx] * xdbcEnv->tuples_per_buffer + numRows * attr.size; } - //TODO: check if we can pass the preallocated strings to our deserializers - if (attr.tpe[0] == 'S') { - //std::string buffer; + // TODO: check if we can pass the preallocated strings to our deserializers + if (attr.tpe[0] == 'S') + { + // std::string buffer; auto &buffer = stringBuffers[colIdx]; stream >> buffer; std::memset(dest, 0, attr.size); std::memcpy(dest, buffer.data(), buffer.size()); - } else { + } + else + { // Use deserializer for other types deserializers[colIdx](stream, dest, attr.size); } @@ -220,7 +258,8 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh ++numRows; ++totalThreadWrittenTuples; - if (numRows == xdbcEnv->tuples_per_buffer) { + if (numRows == xdbcEnv->tuples_per_buffer) + { // Write header and push buffer Header head{}; head.totalTuples = numRows; @@ -228,7 +267,7 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh head.intermediateFormat = xdbcEnv->iformat; std::memcpy(bp[writeBuff].data(), &head, sizeof(Header)); - ///test + /// test /*const char *dataPtr = reinterpret_cast(bp[writeBuff].data() + sizeof(Header)); spdlog::get("XDBC.SERVER")->info("First row values:"); @@ -260,16 +299,16 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh spdlog::get("XDBC.SERVER")->info(oss.str()); }*/ - //test + // test xdbcEnv->pts->push( - ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "push"}); + ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "push"}); xdbcEnv->compBufferPtr->push(writeBuff); writeBuff = xdbcEnv->freeBufferPtr->pop(); xdbcEnv->pts->push( - ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "pop"}); + ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "pop"}); writeBuffPtr = bp[writeBuff].data() + sizeof(Header); totalThreadWrittenBuffers++; @@ -285,7 +324,8 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh } // Handle remaining rows - if (numRows > 0) { + if (numRows > 0) + { spdlog::get("XDBC.SERVER")->info("PQ Deser thread {0} has {1} remaining tuples", thr, numRows); Header head{}; head.totalTuples = numRows; @@ -306,17 +346,19 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh // Notify completion xdbcEnv->finishedDeserThreads.fetch_add(1); - if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) { - for (int i = 0; i < xdbcEnv->compression_parallelism; ++i) { - xdbcEnv->compBufferPtr->push(-1); - } - } + // if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) + // { + // for (int i = 0; i < xdbcEnv->compression_parallelism; ++i) + // { + // xdbcEnv->compBufferPtr->push(-1); + // } + // } return 0; } - -int PQReader::readPQ(int thr) { +int PQReader::readPQ(int thr) +{ xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "start"}); @@ -325,15 +367,18 @@ int PQReader::readPQ(int thr) { // Fetch the next partition to process Part curPart = xdbcEnv->partPtr->pop(); - while (curPart.id != -1) { + while (curPart.id != -1) + { // Iterate over the range of partitions - for (int partitionId = curPart.startOff; partitionId < curPart.endOff; ++partitionId) { + for (int partitionId = curPart.startOff; partitionId < curPart.endOff; ++partitionId) + { // Construct the file name for the current partition std::string fileName = baseDir + tableName + "_part" + std::to_string(partitionId) + ".parquet"; // Open the Parquet file std::ifstream parquetFile(fileName, std::ios::binary | std::ios::in); - if (!parquetFile.is_open()) { + if (!parquetFile.is_open()) + { throw std::runtime_error("Failed to open Parquet file: " + fileName); } @@ -356,16 +401,18 @@ int PQReader::readPQ(int thr) { std::memcpy(writeBuffPtr, &head, sizeof(Header)); // Ensure the buffer is large enough - if (fileSize > xdbcEnv->tuples_per_buffer * xdbcEnv->tuple_size) { + if (fileSize > xdbcEnv->tuples_per_buffer * xdbcEnv->tuple_size) + { throw std::runtime_error("Parquet file is larger than the buffer size."); } parquetFile.read(reinterpret_cast(writeBuffPtr + sizeof(Header)), fileSize); - if (parquetFile.gcount() != fileSize) { + if (parquetFile.gcount() != fileSize) + { throw std::runtime_error("Failed to read the entire Parquet file."); } - //spdlog::get("XDBC.SERVER")->info("Reader thr {} writing buffer {} with size {}", thr, writeBuff, fileSize); + // spdlog::get("XDBC.SERVER")->info("Reader thr {} writing buffer {} with size {}", thr, writeBuff, fileSize); // Push the buffer to the next stage xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "push"}); @@ -376,24 +423,25 @@ int PQReader::readPQ(int thr) { // Fetch the next partition curPart = xdbcEnv->partPtr->pop(); - } xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "end"}); xdbcEnv->finishedReadThreads.fetch_add(1); - if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) { - for (int i = 0; i < xdbcEnv->deser_parallelism; i++) - xdbcEnv->deserBufferPtr->push(-1); - } + // if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + // { + // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) + // xdbcEnv->deserBufferPtr->push(-1); + // } return 0; } - -int PQReader::getTotalReadBuffers() const { +int PQReader::getTotalReadBuffers() const +{ return totalReadBuffers; } -bool PQReader::getFinishedReading() const { +bool PQReader::getFinishedReading() const +{ return finishedReading; } From 203d7317b9f2f4a10e3cc03600458ce8781e6346 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 2 Mar 2025 19:14:33 +0100 Subject: [PATCH 06/19] Enable thread reconfiguration using controller --- DataSources/CSVReader/CSVReader.cpp | 23 +++++++++++++++++------ DataSources/DataSource.h | 3 ++- main.cpp | 12 +++++++----- xdbcserver.cpp | 9 +++++++++ xdbcserver.h | 1 - 5 files changed, 35 insertions(+), 13 deletions(-) diff --git a/DataSources/CSVReader/CSVReader.cpp b/DataSources/CSVReader/CSVReader.cpp index 2088289..ba78310 100644 --- a/DataSources/CSVReader/CSVReader.cpp +++ b/DataSources/CSVReader/CSVReader.cpp @@ -191,8 +191,17 @@ void CSVReader::readData() spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); } }, xdbcEnv->deserBufferPtr); xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); // start deserialize component threads - //*** Finish creating threads for deserialize operation + //*** Finish creating threads for deserialize operation + if (xdbcEnv->spawn_source == 1) + { + xdbcEnv->enable_updation_DS = 1; + } + while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed + { + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); + } // Wait for read to finish and then kill deserialize xdbcEnv->env_manager_DS.joinThreads("read"); xdbcEnv->env_manager_DS.configureThreads("deserialize", 0); @@ -307,11 +316,13 @@ int CSVReader::readCSV(int thr) xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "end"}); xdbcEnv->finishedReadThreads.fetch_add(1); - // if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) - // { - // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) - // xdbcEnv->deserBufferPtr->push(-1); - // } + if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + { + xdbcEnv->enable_updation_DS = 0; + xdbcEnv->enable_updation_xServe = 0; + // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) + // xdbcEnv->deserBufferPtr->push(-1); + } file.close(); spdlog::get("XDBC.SERVER")->info("Read thr {0} finished reading", thr); diff --git a/DataSources/DataSource.h b/DataSources/DataSource.h index 319a53c..630344a 100644 --- a/DataSources/DataSource.h +++ b/DataSources/DataSource.h @@ -82,7 +82,8 @@ struct RuntimeEnv int profilingInterval; int spawn_source; - std::atomic enable_updation = 0; + std::atomic enable_updation_xServe = 0; + std::atomic enable_updation_DS = 0; transfer_details tf_paras; int max_threads = 16; EnvironmentManager env_manager_xServer; diff --git a/main.cpp b/main.cpp index 45f3d67..1cb4927 100755 --- a/main.cpp +++ b/main.cpp @@ -153,7 +153,7 @@ nlohmann::json metrics_convert(RuntimeEnv &env) nlohmann::json metrics_json = nlohmann::json::object(); // Use a JSON object // auto env_pts = env->pts->copyAll(); - if ((env.pts) && (env.enable_updation == 1)) + if ((env.pts) && (env.enable_updation_DS == 1) && (env.enable_updation_xServe == 1)) { std::vector env_pts; env_pts = env.pts->copy_newElements(); @@ -205,12 +205,15 @@ void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) // env.network_parallelism = std::stoi(env_json.at("netParallelism").get()); // env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); - if (env.enable_updation == 1) + if (env.enable_updation_DS == 1) { // env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); - // env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); - // env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); + env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); + } + if (env.enable_updation_xServe == 1) + { + env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); } } catch (const std::exception &e) @@ -228,7 +231,6 @@ int main(int argc, char *argv[]) handleCMDParams(argc, argv, xdbcEnv); // ***Setup websocket interface for controller*** - xdbcEnv.enable_updation = 1; std::thread io_thread; WebSocketClient ws_client("xdbc-controller", "8003"); if (xdbcEnv.spawn_source == 1) diff --git a/xdbcserver.cpp b/xdbcserver.cpp index 9857579..ffdcd2f 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -382,6 +382,15 @@ int XDBCServer::serve() // spdlog::get("XDBC.SERVER")->info("Basesocket signaled with bytes: {0} ", bs); + if (xdbcEnv->spawn_source == 1) + { + xdbcEnv->enable_updation_xServe = 1; + } + while (xdbcEnv->enable_updation_xServe == 1) // Reconfigure threads as long as it is allowed + { + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + xdbcEnv->env_manager_xServer.configureThreads("compress", xdbcEnv->compression_parallelism); + } // Join all the threads t1.join(); xdbcEnv->env_manager_xServer.configureThreads("compress", 0); diff --git a/xdbcserver.h b/xdbcserver.h index 69d432d..11f15b7 100755 --- a/xdbcserver.h +++ b/xdbcserver.h @@ -33,7 +33,6 @@ class XDBCServer explicit XDBCServer(RuntimeEnv &env); int serve(); - // int finishserve(); int send(int threadno, DataSource &dataReader); From 72ca6aaf8886cdd317b1ef616d96b65575d78882 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 2 Mar 2025 22:11:13 +0100 Subject: [PATCH 07/19] Remove commented codes --- Compression/Compressor.cpp | 337 ++++++++++-------- DataSources/CHReader/CHReader.cpp | 21 +- DataSources/CSVReader/CSVReader.cpp | 7 - DataSources/PGReader/PGReader.cpp | 25 +- DataSources/PQReader/PQReader.cpp | 27 +- EnvironmentReconfigure/EnvironmentManager.cpp | 4 +- main.cpp | 21 +- xdbcserver.cpp | 1 + 8 files changed, 235 insertions(+), 208 deletions(-) diff --git a/Compression/Compressor.cpp b/Compression/Compressor.cpp index 506cfd4..d44d1d4 100644 --- a/Compression/Compressor.cpp +++ b/Compression/Compressor.cpp @@ -10,13 +10,13 @@ #include #include -Compressor::Compressor(RuntimeEnv &xdbcEnv) : - xdbcEnv(&xdbcEnv), - bp(*xdbcEnv.bpPtr) { - +Compressor::Compressor(RuntimeEnv &xdbcEnv) : xdbcEnv(&xdbcEnv), + bp(*xdbcEnv.bpPtr) +{ } -void Compressor::compress(int thr, const std::string &compName) { +void Compressor::compress(int thr, const std::string &compName) +{ xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "comp", "start"}); @@ -24,7 +24,8 @@ void Compressor::compress(int thr, const std::string &compName) { int outBufferId; long compressedBuffers = 0; - while (true) { + while (true) + { inBufferId = xdbcEnv->compBufferPtr->pop(); @@ -33,45 +34,50 @@ void Compressor::compress(int thr, const std::string &compName) { xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "comp", "pop"}); - if (!Compressor::getCompId(xdbcEnv->compression_algorithm)) { - //nothing to do, forward buffer + if (!Compressor::getCompId(xdbcEnv->compression_algorithm)) + { + // nothing to do, forward buffer xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "comp", "push"}); xdbcEnv->sendBufferPtr->push(inBufferId); - } else { + } + else + { outBufferId = xdbcEnv->freeBufferPtr->pop(); - //TODO: replace function with a hashmap or similar - //0 nocomp, 1 zstd, 2 snappy, 3 lzo, 4 lz4, 5 zlib, 6 cols + // TODO: replace function with a hashmap or similar + // 0 nocomp, 1 zstd, 2 snappy, 3 lzo, 4 lz4, 5 zlib, 6 cols size_t compId = Compressor::getCompId(xdbcEnv->compression_algorithm); - //spdlog::get("XDBC.SERVER")->warn("Send thread {0} entering compression", thr); + // spdlog::get("XDBC.SERVER")->warn("Send thread {0} entering compression", thr); auto headIn = reinterpret_cast
(bp[inBufferId].data()); auto decompressedPtr = bp[inBufferId].data() + sizeof(Header); std::array compressed_sizes = Compressor::compress_buffer( - xdbcEnv->compression_algorithm, decompressedPtr, bp[outBufferId].data() + sizeof(Header), - headIn->totalSize, - xdbcEnv->tuples_per_buffer, xdbcEnv->schema); + xdbcEnv->compression_algorithm, decompressedPtr, bp[outBufferId].data() + sizeof(Header), + headIn->totalSize, + xdbcEnv->tuples_per_buffer, xdbcEnv->schema); size_t totalSize = 0; - //TODO: check if schema larger than MAX_ATTRIBUTES + // TODO: check if schema larger than MAX_ATTRIBUTES if (compId < 6) totalSize = compressed_sizes[0]; - else { - for (int i = 0; i < xdbcEnv->schema.size(); i++) { + else + { + for (int i = 0; i < xdbcEnv->schema.size(); i++) + { totalSize += compressed_sizes[i]; } } - if (totalSize >= xdbcEnv->buffer_size * 1024 || totalSize <= 0) { - spdlog::get("XDBC.SERVER")->error("Compress thread {} with comp {} invalid size {}/{}", - thr, compId, totalSize, xdbcEnv->buffer_size * 1024); + if (totalSize >= xdbcEnv->buffer_size * 1024 || totalSize <= 0) + { + spdlog::get("XDBC.SERVER")->error("Compress thread {} with comp {} invalid size {}/{}", thr, compId, totalSize, xdbcEnv->buffer_size * 1024); compId = 0; } - //TODO: create more sophisticated header with checksum etc + // TODO: create more sophisticated header with checksum etc Header head{}; head.totalTuples = headIn->totalTuples; @@ -79,45 +85,43 @@ void Compressor::compress(int thr, const std::string &compName) { head.totalSize = totalSize; head.uncompressedSize = headIn->totalSize; head.intermediateFormat = headIn->intermediateFormat; - //head.crc = compute_crc(bp[bufferId].data(), totalSize); - //head.attributeComp; - //head.attributeSize = compressed_sizes; + // head.crc = compute_crc(bp[bufferId].data(), totalSize); + // head.attributeComp; + // head.attributeSize = compressed_sizes; std::copy(compressed_sizes.begin(), compressed_sizes.end(), head.attributeSize); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "comp", "push"}); - if (compId == 0) { - //comp unsuccessful + if (compId == 0) + { + // comp unsuccessful std::memcpy(bp[inBufferId].data(), &head, sizeof(Header)); - //forward in buffer with new header + // forward in buffer with new header xdbcEnv->sendBufferPtr->push(inBufferId); - //release out buffer + // release out buffer xdbcEnv->freeBufferPtr->push(outBufferId); - } else { + } + else + { /*spdlog::get("XDBC.SERVER")->warn("Entering compid {}, totalSize {}, tuples {}, freeQ size {}", compId, head.totalSize, head.totalTuples, xdbcEnv->freeBufferPtr->size());*/ std::memcpy(bp[outBufferId].data(), &head, sizeof(Header)); xdbcEnv->sendBufferPtr->push(outBufferId); xdbcEnv->freeBufferPtr->push(inBufferId); - //spdlog::get("XDBC.SERVER")->warn("Exiting, free size {}", xdbcEnv->freeBufferPtr->size()); - + // spdlog::get("XDBC.SERVER")->warn("Exiting, free size {}", xdbcEnv->freeBufferPtr->size()); } compressedBuffers++; } } xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "comp", "end"}); - //notify that we finished + // notify that we finished xdbcEnv->finishedCompThreads.fetch_add(1); - // if (xdbcEnv->finishedCompThreads == xdbcEnv->compression_parallelism) { - // for (int i = 0; i < xdbcEnv->network_parallelism; i++) - // xdbcEnv->sendBufferPtr->push(-1); - // } - } -size_t Compressor::getCompId(const std::string &name) { +size_t Compressor::getCompId(const std::string &name) +{ if (name == "nocomp") return 0; @@ -136,13 +140,15 @@ size_t Compressor::getCompId(const std::string &name) { return 0; } -//TODO: recycle compression resources like context, temp buffer etc -//TODO: check that we dont exceed size by calculating the max compressed size funcs of the libs +// TODO: recycle compression resources like context, temp buffer etc +// TODO: check that we dont exceed size by calculating the max compressed size funcs of the libs -size_t Compressor::compress_zstd(void *data, void *dst, size_t size) { +size_t Compressor::compress_zstd(void *data, void *dst, size_t size) +{ size_t compressedSize = ZSTD_compress(dst, size, data, size, 1); - if (ZSTD_isError(compressedSize)) { + if (ZSTD_isError(compressedSize)) + { spdlog::get("XDBC.SERVER")->warn("Zstd Compression error: {0}", std::string(ZSTD_getErrorName(compressedSize))); return size; } @@ -150,13 +156,14 @@ size_t Compressor::compress_zstd(void *data, void *dst, size_t size) { return compressedSize; } - -size_t Compressor::compress_snappy(void *data, void *dst, size_t size) { - //size_t maxCompressedSize = snappy::MaxCompressedLength(size); +size_t Compressor::compress_snappy(void *data, void *dst, size_t size) +{ + // size_t maxCompressedSize = snappy::MaxCompressedLength(size); size_t compressedSize; snappy::RawCompress(static_cast(data), size, static_cast(dst), &compressedSize); - if (compressedSize >= size) { + if (compressedSize >= size) + { spdlog::get("XDBC.SERVER")->info("Snappy compression not effective. Returning original size."); return size; } @@ -164,13 +171,14 @@ size_t Compressor::compress_snappy(void *data, void *dst, size_t size) { return compressedSize; } +size_t Compressor::compress_lzo(void *src, void *dst, size_t size) +{ + // size_t maxCompressedSize = size + (size / 16) + 64 + 3; -size_t Compressor::compress_lzo(void *src, void *dst, size_t size) { - //size_t maxCompressedSize = size + (size / 16) + 64 + 3; - - //allocate the work memory required by LZO - lzo_voidp wrkmem = (lzo_voidp) malloc(LZO1X_1_MEM_COMPRESS); - if (!wrkmem) { + // allocate the work memory required by LZO + lzo_voidp wrkmem = (lzo_voidp)malloc(LZO1X_1_MEM_COMPRESS); + if (!wrkmem) + { spdlog::get("XDBC.SERVER")->warn("lzo compression error: failed to allocate work memory."); return size; } @@ -179,34 +187,38 @@ size_t Compressor::compress_lzo(void *src, void *dst, size_t size) { int result = lzo1x_1_compress(static_cast(src), size, static_cast(dst), &compressedSize, wrkmem); - //free the work memory + // free the work memory free(wrkmem); - if (result != LZO_E_OK) { + if (result != LZO_E_OK) + { spdlog::get("XDBC.SERVER")->warn("lzo compression error: result code {0}.", result); return size; } - if (compressedSize > size) { - spdlog::get("XDBC.SERVER")->warn("lzo compression error: compressed size exceeds maximum size: {0}/{1}", - size, compressedSize); + if (compressedSize > size) + { + spdlog::get("XDBC.SERVER")->warn("lzo compression error: compressed size exceeds maximum size: {0}/{1}", size, compressedSize); return size; } return compressedSize; } -size_t Compressor::compress_lz4(void *src, void *dst, size_t size) { - //int maxCompressedSize = LZ4_compressBound(size); +size_t Compressor::compress_lz4(void *src, void *dst, size_t size) +{ + // int maxCompressedSize = LZ4_compressBound(size); int compressedSize = LZ4_compress_default(static_cast(src), static_cast(dst), size, size); - if (compressedSize <= 0) { + if (compressedSize <= 0) + { spdlog::get("XDBC.SERVER")->warn("lz4 compression error: {0}", compressedSize); return size; } - if (compressedSize >= static_cast(size)) { + if (compressedSize >= static_cast(size)) + { spdlog::get("XDBC.SERVER")->info("lz4 compression not effective. Returning original size."); return size; } @@ -214,24 +226,26 @@ size_t Compressor::compress_lz4(void *src, void *dst, size_t size) { return compressedSize; } - -size_t Compressor::compress_zlib(void *src, void *dst, size_t size) { +size_t Compressor::compress_zlib(void *src, void *dst, size_t size) +{ uLongf maxCompressedSize = compressBound(size); uLongf compressedSize = maxCompressedSize; - //maximum compression level + // maximum compression level int compression_level = 9; int result = compress2(static_cast(dst), &compressedSize, static_cast(src), size, compression_level); - if (result != Z_OK) { + if (result != Z_OK) + { spdlog::get("XDBC.SERVER")->warn("ZLIB compression error: {0}", zError(result)); return size; } - if (compressedSize >= size) { + if (compressedSize >= size) + { spdlog::get("XDBC.SERVER")->info("ZLIB compression not effective, returning original size."); return size; } @@ -239,86 +253,97 @@ size_t Compressor::compress_zlib(void *src, void *dst, size_t size) { return compressedSize; } - std::array Compressor::compress_buffer(const std::string &method, void *src, void *dst, size_t size, size_t buff_size, - const std::vector &schema) { + const std::vector &schema) +{ - - //1 zstd - //2 snappy - //3 lzo - //4 lz4 - //5 zlib - //6 cols + // 1 zstd + // 2 snappy + // 3 lzo + // 4 lz4 + // 5 zlib + // 6 cols std::array ret{}; for (size_t i = 0; i < MAX_ATTRIBUTES; i++) ret[i] = 0; auto compMeth = getCompId(method); - switch (compMeth) { - case 0: { - ret[0] = size; - break; - } - case 1: { - ret[0] = compress_zstd(src, dst, size); - break; - } - case 2: { - ret[0] = compress_snappy(src, dst, size); - break; - } - case 3: { - ret[0] = compress_lzo(src, dst, size); - break; - } - case 4: { - ret[0] = compress_lz4(src, dst, size); - break; - } - case 5: { - ret[0] = compress_zlib(src, dst, size); - break; - } - case 6: { - ret = compress_cols(src, dst, size, buff_size, schema); - break; - } - default: { - std::memcpy(dst, src, size); - ret[0] = size; - break; - } + switch (compMeth) + { + case 0: + { + ret[0] = size; + break; + } + case 1: + { + ret[0] = compress_zstd(src, dst, size); + break; + } + case 2: + { + ret[0] = compress_snappy(src, dst, size); + break; + } + case 3: + { + ret[0] = compress_lzo(src, dst, size); + break; + } + case 4: + { + ret[0] = compress_lz4(src, dst, size); + break; + } + case 5: + { + ret[0] = compress_zlib(src, dst, size); + break; + } + case 6: + { + ret = compress_cols(src, dst, size, buff_size, schema); + break; + } + default: + { + std::memcpy(dst, src, size); + ret[0] = size; + break; + } } - return ret; } -//compress floating-point data -static size_t compress(FPZ *fpz, const void *data) { +// compress floating-point data +static size_t compress(FPZ *fpz, const void *data) +{ size_t size; /* write header */ - if (!fpzip_write_header(fpz)) { + if (!fpzip_write_header(fpz)) + { fprintf(stderr, "cannot write header: %s\n", fpzip_errstr[fpzip_errno]); return 0; } - //perform actual compression + // perform actual compression size = fpzip_write(fpz, data); - if (!size) { + if (!size) + { spdlog::get("XDBC.SERVER")->error("Compressor: fpzip failed, error: {0}", fpzip_errstr[fpzip_errno]); return 0; } return size; } -size_t compressIntColumn(uint32_t *in, void *out, size_t buff_size) { +size_t compressIntColumn(uint32_t *in, void *out, size_t buff_size) +{ - //memcpy(out, in, buffer_size * 48); - //return buffer_size; - //spdlog::get("XDBC.SERVER")->error("Compressor: Entered compressIntCol, input size: {0}", size); + // memcpy(out, in, buffer_size * 48); + // return buffer_size; + // spdlog::get("XDBC.SERVER")->error("Compressor: Entered compressIntCol, input size: {0}", size); std::vector compressedData(buff_size); @@ -327,19 +352,19 @@ size_t compressIntColumn(uint32_t *in, void *out, size_t buff_size) { IntegerCODEC &codec = *factory.getFromName("simdfastpfor256"); - //std::vector compressed_output(buffer_size + 1024); + // std::vector compressed_output(buffer_size + 1024); size_t compressedsize = buff_size + 1024; codec.encodeArray(in, buff_size, reinterpret_cast(out), compressedsize); - //compressed_output.resize(compressedsize); - //compressed_output.shrink_to_fit(); + // compressed_output.resize(compressedsize); + // compressed_output.shrink_to_fit(); - //memcpy(out, compressedData.data(), compressedsize * 4); + // memcpy(out, compressedData.data(), compressedsize * 4); - //spdlog::get("XDBC.SERVER")->error("compressedsize: {0}, in first value: {1}", compressedsize, uint_data[0]); - //TODO: remove code for decompression - //auto testData = reinterpret_cast(out); + // spdlog::get("XDBC.SERVER")->error("compressedsize: {0}, in first value: {1}", compressedsize, uint_data[0]); + // TODO: remove code for decompression + // auto testData = reinterpret_cast(out); /*std::vector mydataback(buffer_size); size_t recoveredsize = mydataback.size(); @@ -351,21 +376,21 @@ size_t compressIntColumn(uint32_t *in, void *out, size_t buff_size) { if (mydataback[0] != testData[0]) { spdlog::get("XDBC.SERVER")->error("Unequal values: {0}!={1}", mydataback[0], testData[0]); }*/ - //spdlog::get("XDBC.SERVER")->error("Compressor: compressed input: {0}, output: {1}", buffer_size, compressedsize); - //spdlog::get("XDBC.SERVER")->error("Decompressed data: {0}", mydataback[0]); + // spdlog::get("XDBC.SERVER")->error("Compressor: compressed input: {0}, output: {1}", buffer_size, compressedsize); + // spdlog::get("XDBC.SERVER")->error("Decompressed data: {0}", mydataback[0]); return compressedsize; } -size_t compressDoubleColumn(const double *in, void *out, size_t size) { - +size_t compressDoubleColumn(const double *in, void *out, size_t size) +{ int status; size_t inbytes = size * sizeof(double); size_t bufbytes = 1024 + inbytes; size_t outbytes = 0; - //void *buffer = malloc(bufbytes); + // void *buffer = malloc(bufbytes); /* compress to memory */ FPZ *fpz = fpzip_write_to_buffer(out, bufbytes); @@ -379,16 +404,16 @@ size_t compressDoubleColumn(const double *in, void *out, size_t size) { status = (0 < outbytes && outbytes <= bufbytes); fpzip_write_close(fpz); - return outbytes; } std::array Compressor::compress_cols(void *src, void *dst, size_t size, size_t buff_size, - const std::vector &schema) { + const std::vector &schema) +{ std::array compressedColumns{}; - //TODO: get schema size automatically + // TODO: get schema size automatically std::vector outputBuf(size); auto compressedPtr = outputBuf.data(); @@ -396,51 +421,51 @@ std::array Compressor::compress_cols(void *src, void *ds size_t totalSize = 0; int bytesWritten = 0; - for (const auto &attribute: schema) { + for (const auto &attribute : schema) + { - //spdlog::get("XDBC.SERVER")->warn("handling attribute: {0} with attributeNum: {1}", std::get<0>(attribute), attributeNum); + // spdlog::get("XDBC.SERVER")->warn("handling attribute: {0} with attributeNum: {1}", std::get<0>(attribute), attributeNum); size_t compressedDataSize = 0; - if (attribute.tpe == "INT") { + if (attribute.tpe == "INT") + { uint32_t *decompressedPtr = reinterpret_cast(reinterpret_cast(src) + buff_size * bytesWritten); size_t compressedDataSizeElements = compressIntColumn(decompressedPtr, compressedPtr, buff_size); compressedDataSize = compressedDataSizeElements * attribute.size; bytesWritten += 4; - //compressedDataSize += 4 * buffer_size; - //spdlog::get("XDBC.SERVER")->warn("compressedDataSize: {0}, added {1} ", compressedDataSize, 4*buffer_size); - } else if (attribute.tpe == "DOUBLE") { - //TODO: refactor compress fpzip in other function + // compressedDataSize += 4 * buffer_size; + // spdlog::get("XDBC.SERVER")->warn("compressedDataSize: {0}, added {1} ", compressedDataSize, 4*buffer_size); + } + else if (attribute.tpe == "DOUBLE") + { + // TODO: refactor compress fpzip in other function double *decompressedPtr = reinterpret_cast(reinterpret_cast(src) + buff_size * bytesWritten); - //compressedDataSize = compress_zstd(decompressedPtr, compressedPtr, buff_size * 8); + // compressedDataSize = compress_zstd(decompressedPtr, compressedPtr, buff_size * 8); compressedDataSize = compressDoubleColumn(decompressedPtr, compressedPtr, buff_size); bytesWritten += 8; - //auto startPtr = reinterpret_cast(data) + buff_size * attributeNum; - + // auto startPtr = reinterpret_cast(data) + buff_size * attributeNum; - //compressedDataSize += 8 * buffer_size; + // compressedDataSize += 8 * buffer_size; } - //TODO: add more attributes (CHAR, STRING) + // TODO: add more attributes (CHAR, STRING) compressedColumns[attributeNum] = compressedDataSize; totalSize += compressedDataSize; attributeNum++; compressedPtr += compressedDataSize; - //spdlog::get("XDBC.SERVER")->warn("compressedDataSize: {0}", compressedDataSize); + // spdlog::get("XDBC.SERVER")->warn("compressedDataSize: {0}", compressedDataSize); } - if (totalSize >= size) { + if (totalSize >= size) + { spdlog::get("XDBC.SERVER")->warn("Compressor: buffer overflow {0}/{1}", totalSize, size); - //set first entry to total size, handle to send uncompressed + // set first entry to total size, handle to send uncompressed compressedColumns[0] = size; - } else + } + else std::memcpy(dst, outputBuf.data(), totalSize); return compressedColumns; } - - - - - diff --git a/DataSources/CHReader/CHReader.cpp b/DataSources/CHReader/CHReader.cpp index 76eff14..2b183dc 100644 --- a/DataSources/CHReader/CHReader.cpp +++ b/DataSources/CHReader/CHReader.cpp @@ -88,7 +88,16 @@ void CHReader::readData() xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start deserialize component threads //*** Finish creating threads for deserialize operation - // Wait for read to finish + if (xdbcEnv->spawn_source == 1) + { + xdbcEnv->enable_updation_DS = 1; + } + // while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed + // { + // std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + // xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); + // } + xdbcEnv->env_manager_DS.joinThreads("read"); int total = 0; @@ -275,11 +284,11 @@ int CHReader::chWriteToBp(int thr, int &totalThreadWrittenTuples, int &totalThre } // notify that we finished xdbcEnv->finishedReadThreads.fetch_add(1); - // if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) - // { - // for (int i = 0; i < xdbcEnv->compression_parallelism; i++) - // xdbcEnv->compBufferPtr->push(-1); - // } + if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + { + xdbcEnv->enable_updation_DS = 0; + xdbcEnv->enable_updation_xServe = 0; + } return 1; } diff --git a/DataSources/CSVReader/CSVReader.cpp b/DataSources/CSVReader/CSVReader.cpp index ba78310..15f6b31 100644 --- a/DataSources/CSVReader/CSVReader.cpp +++ b/DataSources/CSVReader/CSVReader.cpp @@ -320,8 +320,6 @@ int CSVReader::readCSV(int thr) { xdbcEnv->enable_updation_DS = 0; xdbcEnv->enable_updation_xServe = 0; - // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) - // xdbcEnv->deserBufferPtr->push(-1); } file.close(); @@ -600,11 +598,6 @@ int CSVReader::deserializeCSV(int thr, int &totalThreadWrittenTuples, int &total spdlog::get("XDBC.SERVER")->info("CSV Deser thread {0} finished. buffers: {1}, tuples {2}", thr, totalThreadWrittenBuffers, totalThreadWrittenTuples); xdbcEnv->finishedDeserThreads.fetch_add(1); - // if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) - // { - // for (int i = 0; i < xdbcEnv->compression_parallelism; i++) - // xdbcEnv->compBufferPtr->push(-1); - // } xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "end"}); diff --git a/DataSources/PGReader/PGReader.cpp b/DataSources/PGReader/PGReader.cpp index 95e5a90..939d1df 100644 --- a/DataSources/PGReader/PGReader.cpp +++ b/DataSources/PGReader/PGReader.cpp @@ -222,6 +222,16 @@ int PGReader::read_pq_copy() xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); // start deserialize component threads //*** Finish creating threads for deserialize operation + if (xdbcEnv->spawn_source == 1) + { + xdbcEnv->enable_updation_DS = 1; + } + while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed + { + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); + } + // Wait for read to finish and then kill deserialize xdbcEnv->env_manager_DS.joinThreads("read"); xdbcEnv->env_manager_DS.configureThreads("deserialize", 0); @@ -403,11 +413,6 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh spdlog::get("XDBC.SERVER")->info("PG Deser thread {0} finished. buffers: {1}, tuples {2}", thr, totalThreadWrittenBuffers, totalThreadWrittenTuples); xdbcEnv->finishedDeserThreads.fetch_add(1); - // if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) - // { - // for (int i = 0; i < xdbcEnv->compression_parallelism; i++) - // xdbcEnv->compBufferPtr->push(-1); - // } xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "end"}); @@ -551,11 +556,11 @@ int PGReader::readPG(int thr) xdbcEnv->deserBufferPtr->push(curBid); xdbcEnv->finishedReadThreads.fetch_add(1); - // if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) - // { - // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) - // xdbcEnv->deserBufferPtr->push(-1); - // } + if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + { + xdbcEnv->enable_updation_DS = 0; + xdbcEnv->enable_updation_xServe = 0; + } int deserFinishedCounter = 0; diff --git a/DataSources/PQReader/PQReader.cpp b/DataSources/PQReader/PQReader.cpp index 0ee9ccd..435e16c 100644 --- a/DataSources/PQReader/PQReader.cpp +++ b/DataSources/PQReader/PQReader.cpp @@ -95,6 +95,16 @@ void PQReader::readData() xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); // start deserialize component threads //*** Finish creating threads for deserialize operation + if (xdbcEnv->spawn_source == 1) + { + xdbcEnv->enable_updation_DS = 1; + } + while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed + { + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); + } + // Wait for read to finish and then kill deserialize xdbcEnv->env_manager_DS.joinThreads("read"); xdbcEnv->env_manager_DS.configureThreads("deserialize", 0); @@ -346,13 +356,6 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh // Notify completion xdbcEnv->finishedDeserThreads.fetch_add(1); - // if (xdbcEnv->finishedDeserThreads == xdbcEnv->deser_parallelism) - // { - // for (int i = 0; i < xdbcEnv->compression_parallelism; ++i) - // { - // xdbcEnv->compBufferPtr->push(-1); - // } - // } return 0; } @@ -427,11 +430,11 @@ int PQReader::readPQ(int thr) xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "end"}); xdbcEnv->finishedReadThreads.fetch_add(1); - // if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) - // { - // for (int i = 0; i < xdbcEnv->deser_parallelism; i++) - // xdbcEnv->deserBufferPtr->push(-1); - // } + if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + { + xdbcEnv->enable_updation_DS = 0; + xdbcEnv->enable_updation_xServe = 0; + } return 0; } diff --git a/EnvironmentReconfigure/EnvironmentManager.cpp b/EnvironmentReconfigure/EnvironmentManager.cpp index 3cf9790..9abdacf 100644 --- a/EnvironmentReconfigure/EnvironmentManager.cpp +++ b/EnvironmentReconfigure/EnvironmentManager.cpp @@ -137,7 +137,7 @@ void EnvironmentManager::run() spdlog::error("Unknown exception in thread {}", thread_id); } }); } - spdlog::info("Reconfigure thread for operation {0} by {1}", name, delta_threads); + spdlog::info("Reconfigure threads for operation {0} by +{1}", name, delta_threads); } else if (delta_threads < 0) { @@ -151,7 +151,7 @@ void EnvironmentManager::run() } operation.poisonQueue->push(-1); } - spdlog::info("Reconfigure thread for operation {0} by {1}", name, delta_threads); + spdlog::info("Reconfigure threads for operation {0} by {1}", name, delta_threads); } operation.active_threads = operation.desired_threads; diff --git a/main.cpp b/main.cpp index 1cb4927..7fef77d 100755 --- a/main.cpp +++ b/main.cpp @@ -181,10 +181,10 @@ nlohmann::json additional_msg(RuntimeEnv &env) { nlohmann::json metrics_json = nlohmann::json::object(); // Use a JSON object metrics_json["totalTime_ms"] = env.tf_paras.elapsed_time; - // metrics_json["freeBufferQ_load"] = std::get<0>(env.tf_paras.latest_queueSizes); - // metrics_json["compressedBufferQ_load"] = std::get<1>(env.tf_paras.latest_queueSizes); - // metrics_json["decompressedBufferQ_load"] = std::get<2>(env.tf_paras.latest_queueSizes); - // metrics_json["deserializedBufferQ_load"] = std::get<3>(env.tf_paras.latest_queueSizes); + metrics_json["readBufferQ_load"] = std::get<0>(env.tf_paras.latest_queueSizes); + metrics_json["deserializedBufferQ_load"] = std::get<1>(env.tf_paras.latest_queueSizes); + metrics_json["compressedBufferQ_load"] = std::get<2>(env.tf_paras.latest_queueSizes); + metrics_json["sendBufferQ_load"] = std::get<3>(env.tf_paras.latest_queueSizes); return metrics_json; } @@ -192,23 +192,14 @@ void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) { try { - // env.transfer_id = std::stoll(env_json.at("transferID").get()); - // env.system = env_json.at("system").get(); - // env.compression_algorithm = env_json.at("compressionType").get(); - // env.iformat = std::stoi(env_json.at("intermediateFormat").get()); // env.buffer_size = std::stoi(env_json.at("bufferSize").get()); // env.buffers_in_bufferpool = std::stoi(env_json.at("bufferpoolSize").get()) / env_.buffer_size; - // env.sleep_time = std::chrono::milliseconds(std::stoll(env_json.at("sleepTime").get())); // env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); // env.read_partitions = std::stoi(env_json.at("readPartitions").get()); - // env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); // env.network_parallelism = std::stoi(env_json.at("netParallelism").get()); - // env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); if (env.enable_updation_DS == 1) { - - // env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); } if (env.enable_updation_xServe == 1) @@ -245,7 +236,7 @@ int main(int argc, char *argv[]) std::this_thread::sleep_for(std::chrono::milliseconds(100)); } } - // ***Finished setup websocket interface for controller*** + // ***Finished setting up websocket interface for controller*** auto start = std::chrono::steady_clock::now(); @@ -254,7 +245,7 @@ int main(int argc, char *argv[]) auto end = std::chrono::steady_clock::now(); auto total_time = std::chrono::duration_cast(end - start).count(); - + xdbcEnv.tf_paras.elapsed_time = static_cast(total_time); spdlog::get("XDBC.SERVER")->info("Total elapsed time: {} ms", total_time); auto pts = std::vector(xdbcEnv.pts->size()); diff --git a/xdbcserver.cpp b/xdbcserver.cpp index ffdcd2f..0eaaf39 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -151,6 +151,7 @@ void XDBCServer::monitorQueues() // Store the measurement as a tuple xdbcEnv->queueSizes.emplace_back(curTimeInterval, readBufferTotalSize, deserBufferTotalSize, compressedBufferTotalSize, sendBufferTotalSize); + xdbcEnv->tf_paras.latest_queueSizes = std::make_tuple(readBufferTotalSize, deserBufferTotalSize, compressedBufferTotalSize, sendBufferTotalSize); std::this_thread::sleep_for(std::chrono::milliseconds(xdbcEnv->profilingInterval)); curTimeInterval += xdbcEnv->profilingInterval / 1000; From d117a2afcb06a79f4dcae98d23c474e27442a545 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 9 Mar 2025 18:04:01 +0100 Subject: [PATCH 08/19] Enable choice of partitions to user --- DataSources/CSVReader/CSVReader.cpp | 9 ++++++--- DataSources/DataSource.h | 1 + main.cpp | 2 +- xdbcserver.cpp | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/DataSources/CSVReader/CSVReader.cpp b/DataSources/CSVReader/CSVReader.cpp index 15f6b31..740ebf4 100644 --- a/DataSources/CSVReader/CSVReader.cpp +++ b/DataSources/CSVReader/CSVReader.cpp @@ -131,7 +131,7 @@ void CSVReader::readData() int maxRowNum = wc(fileName.c_str()); spdlog::get("XDBC.SERVER")->info("CSV line number: {0}", maxRowNum); - int partNum = xdbcEnv->read_parallelism; + int partNum = xdbcEnv->read_partitions; div_t partSizeDiv = div(maxRowNum, partNum); int partSize = partSizeDiv.quot; @@ -227,7 +227,7 @@ void CSVReader::readData() int CSVReader::readCSV(int thr) { - + xdbcEnv->activeReadThreads.fetch_add(1); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "start"}); // auto fileName = "/dev/shm/" + tableName + "_" + thrStrNum + ".csv"; auto fileName = "/dev/shm/" + tableName + ".csv"; @@ -255,6 +255,7 @@ int CSVReader::readCSV(int thr) while (curPart.id != -1) { + spdlog::get("XDBC.SERVER")->info("Fetching partition {0}", curPart.id); // skip to our starting offset while (currentLine < curPart.startOff && std::getline(file, line)) { @@ -299,6 +300,7 @@ int CSVReader::readCSV(int thr) ++currentLine; } currentLine = 0; + file.seekg(0, std::ios::beg); // Reset file pointer to the beginning curPart = xdbcEnv->partPtr->pop(); } @@ -316,7 +318,8 @@ int CSVReader::readCSV(int thr) xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "end"}); xdbcEnv->finishedReadThreads.fetch_add(1); - if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + xdbcEnv->activeReadThreads.fetch_add(-1); + if (xdbcEnv->activeReadThreads == 0) { xdbcEnv->enable_updation_DS = 0; xdbcEnv->enable_updation_xServe = 0; diff --git a/DataSources/DataSource.h b/DataSources/DataSource.h index 630344a..b40c960 100644 --- a/DataSources/DataSource.h +++ b/DataSources/DataSource.h @@ -61,6 +61,7 @@ struct RuntimeEnv int compression_parallelism; bool skip_deserializer; + std::atomic activeReadThreads; std::atomic finishedReadThreads; std::atomic finishedDeserThreads; std::atomic finishedCompThreads; diff --git a/main.cpp b/main.cpp index 7fef77d..196b932 100755 --- a/main.cpp +++ b/main.cpp @@ -195,11 +195,11 @@ void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) // env.buffer_size = std::stoi(env_json.at("bufferSize").get()); // env.buffers_in_bufferpool = std::stoi(env_json.at("bufferpoolSize").get()) / env_.buffer_size; // env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); - // env.read_partitions = std::stoi(env_json.at("readPartitions").get()); // env.network_parallelism = std::stoi(env_json.at("netParallelism").get()); if (env.enable_updation_DS == 1) { + env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); } if (env.enable_updation_xServe == 1) diff --git a/xdbcserver.cpp b/xdbcserver.cpp index 0eaaf39..ed80c25 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -65,6 +65,7 @@ XDBCServer::XDBCServer(RuntimeEnv &xdbcEnv) // initialize read thread status xdbcEnv.finishedReadThreads.store(0); + xdbcEnv.activeReadThreads.store(0); // initialize free queue xdbcEnv.freeBufferPtr = std::make_shared>(); From 53aa1d40faa3d9e698525bc662b044b81218c44b Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 9 Mar 2025 19:36:13 +0100 Subject: [PATCH 09/19] Enable runtime reconfiguration of read parallelism for csv --- DataSources/CSVReader/CSVReader.cpp | 23 +++++++++++------------ DataSources/DataSource.h | 2 ++ xdbcserver.cpp | 3 ++- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/DataSources/CSVReader/CSVReader.cpp b/DataSources/CSVReader/CSVReader.cpp index 740ebf4..ddb305a 100644 --- a/DataSources/CSVReader/CSVReader.cpp +++ b/DataSources/CSVReader/CSVReader.cpp @@ -139,6 +139,7 @@ void CSVReader::readData() if (partSizeDiv.rem > 0) partSize++; + xdbcEnv->readPart_info.resize(partNum); for (int i = partNum - 1; i >= 0; i--) { Part p{}; @@ -148,16 +149,12 @@ void CSVReader::readData() if (i == partNum - 1) p.endOff = maxRowNum; - - xdbcEnv->partPtr->push(p); + xdbcEnv->readPartPtr->push(i); + xdbcEnv->readPart_info[i] = p; spdlog::get("XDBC.SERVER")->info("Partition {0} [{1},{2}] pushed into queue", p.id, p.startOff, p.endOff); } - // final partition - Part fP{}; - fP.id = -1; - //*** Create threads for read operation xdbcEnv->env_manager_DS.registerOperation("read", [&](int thr) { try { @@ -165,13 +162,13 @@ void CSVReader::readData() spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); return; } - xdbcEnv->partPtr->push(fP); + xdbcEnv->readPartPtr->push(-1); readCSV(thr); } catch (const std::exception& e) { spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); } catch (...) { spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); - } }, xdbcEnv->freeBufferPtr); + } }, xdbcEnv->readPartPtr); xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start read component threads //*** Finish creating threads for read operation @@ -201,6 +198,7 @@ void CSVReader::readData() { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); + xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); } // Wait for read to finish and then kill deserialize xdbcEnv->env_manager_DS.joinThreads("read"); @@ -243,7 +241,8 @@ int CSVReader::readCSV(int thr) int curBid = xdbcEnv->freeBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "pop"}); - Part curPart = xdbcEnv->partPtr->pop(); + int part_id = xdbcEnv->readPartPtr->pop(); + Part curPart; std::string line; int currentLine = 0; @@ -253,8 +252,9 @@ int CSVReader::readCSV(int thr) size_t tuplesRead = 0; size_t tuplesWritten = 0; - while (curPart.id != -1) + while (part_id != -1) { + curPart = xdbcEnv->readPart_info[part_id]; spdlog::get("XDBC.SERVER")->info("Fetching partition {0}", curPart.id); // skip to our starting offset while (currentLine < curPart.startOff && std::getline(file, line)) @@ -301,8 +301,7 @@ int CSVReader::readCSV(int thr) } currentLine = 0; file.seekg(0, std::ios::beg); // Reset file pointer to the beginning - - curPart = xdbcEnv->partPtr->pop(); + part_id = xdbcEnv->readPartPtr->pop(); } Header head{}; diff --git a/DataSources/DataSource.h b/DataSources/DataSource.h index b40c960..c430980 100644 --- a/DataSources/DataSource.h +++ b/DataSources/DataSource.h @@ -72,7 +72,9 @@ struct RuntimeEnv FBQ_ptr compBufferPtr; FBQ_ptr sendBufferPtr; FPQ_ptr partPtr; + FBQ_ptr readPartPtr; + std::vector readPart_info; std::vector> *bpPtr; std::string system; std::vector schema; diff --git a/xdbcserver.cpp b/xdbcserver.cpp index ed80c25..2888805 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -67,8 +67,9 @@ XDBCServer::XDBCServer(RuntimeEnv &xdbcEnv) xdbcEnv.finishedReadThreads.store(0); xdbcEnv.activeReadThreads.store(0); - // initialize free queue + // initialize free queue and partition queue xdbcEnv.freeBufferPtr = std::make_shared>(); + xdbcEnv.readPartPtr = std::make_shared>(); // initially all buffers are put in the free buffer queue for (int i = 0; i < xdbcEnv.buffers_in_bufferpool; i++) From 3c3d2b0729d3518fb1d3e5733b4c812fcbd32812 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 9 Mar 2025 19:57:00 +0100 Subject: [PATCH 10/19] Enable runtime reconfigure for PQ read --- DataSources/PQReader/PQReader.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/DataSources/PQReader/PQReader.cpp b/DataSources/PQReader/PQReader.cpp index 435e16c..a11165d 100644 --- a/DataSources/PQReader/PQReader.cpp +++ b/DataSources/PQReader/PQReader.cpp @@ -41,6 +41,7 @@ void PQReader::readData() if (partSizeDiv.rem > 0) partSize++; + xdbcEnv->readPart_info.resize(partNum); for (int i = partNum - 1; i >= 0; i--) { Part p{}; @@ -51,15 +52,12 @@ void PQReader::readData() if (i == partNum - 1) p.endOff = numFiles; - xdbcEnv->partPtr->push(p); + xdbcEnv->readPartPtr->push(i); + xdbcEnv->readPart_info[i] = p; spdlog::get("XDBC.SERVER")->info("Partition {0} [{1},{2}] pushed into queue", p.id, p.startOff, p.endOff); } - // final partition - Part fP{}; - fP.id = -1; - //*** Create threads for read operation xdbcEnv->env_manager_DS.registerOperation("read", [&](int thr) { try { @@ -67,13 +65,13 @@ void PQReader::readData() spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); return; } - xdbcEnv->partPtr->push(fP); + xdbcEnv->readPartPtr->push(-1); readPQ(thr); } catch (const std::exception& e) { spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); } catch (...) { spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); - } }, xdbcEnv->freeBufferPtr); + } }, xdbcEnv->readPartPtr); xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start read component threads //*** Finish creating threads for read operation @@ -102,6 +100,7 @@ void PQReader::readData() while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); } @@ -362,16 +361,17 @@ int PQReader::deserializePQ(int thr, int &totalThreadWrittenTuples, int &totalTh int PQReader::readPQ(int thr) { - + xdbcEnv->activeReadThreads.fetch_add(1); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "start"}); // Base directory containing the split Parquet files auto baseDir = "/dev/shm/" + tableName + "/"; - + int part_id = xdbcEnv->readPartPtr->pop(); // Fetch the next partition to process - Part curPart = xdbcEnv->partPtr->pop(); - while (curPart.id != -1) + Part curPart; + while (part_id != -1) { + curPart = xdbcEnv->readPart_info[part_id]; // Iterate over the range of partitions for (int partitionId = curPart.startOff; partitionId < curPart.endOff; ++partitionId) { @@ -425,12 +425,13 @@ int PQReader::readPQ(int thr) } // Fetch the next partition - curPart = xdbcEnv->partPtr->pop(); + xdbcEnv->readPartPtr->pop(); } xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "end"}); xdbcEnv->finishedReadThreads.fetch_add(1); - if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + xdbcEnv->activeReadThreads.fetch_add(-1); + if (xdbcEnv->activeReadThreads == 0) { xdbcEnv->enable_updation_DS = 0; xdbcEnv->enable_updation_xServe = 0; From b1d0010dc9155d1ee7bcdc08dd3933cf53296526 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 9 Mar 2025 20:06:53 +0100 Subject: [PATCH 11/19] Enable runtime reconfigure for PG read --- DataSources/PGReader/PGReader.cpp | 28 ++++++++++++++-------------- DataSources/PQReader/PQReader.cpp | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/DataSources/PGReader/PGReader.cpp b/DataSources/PGReader/PGReader.cpp index 939d1df..1851a65 100644 --- a/DataSources/PGReader/PGReader.cpp +++ b/DataSources/PGReader/PGReader.cpp @@ -168,6 +168,7 @@ int PGReader::read_pq_copy() if (partSizeDiv.rem > 0) partSize++; + xdbcEnv->readPart_info.resize(partNum); for (int i = partNum - 1; i >= 0; i--) { Part p{}; @@ -178,15 +179,12 @@ int PGReader::read_pq_copy() if (i == partNum - 1) p.endOff = UINT32_MAX; - xdbcEnv->partPtr->push(p); + xdbcEnv->readPartPtr->push(i); + xdbcEnv->readPart_info[i] = p; spdlog::get("XDBC.SERVER")->info("Partition {0} [{1},{2}] pushed into queue ", p.id, p.startOff, p.endOff); } - // final partition - Part fP{}; - fP.id = -1; - //*** Create threads for read operation xdbcEnv->env_manager_DS.registerOperation("read", [&](int thr) { try { @@ -194,13 +192,13 @@ int PGReader::read_pq_copy() spdlog::get("XDBC.SERVER")->error("No of threads exceed limit"); return; } - xdbcEnv->partPtr->push(fP); + xdbcEnv->readPartPtr->push(-1); readPG(thr); } catch (const std::exception& e) { spdlog::get("XDBC.SERVER")->error("Exception in thread {}: {}", thr, e.what()); } catch (...) { spdlog::get("XDBC.SERVER")->error("Unknown exception in thread {}", thr); - } }, xdbcEnv->freeBufferPtr); + } }, xdbcEnv->readPartPtr); xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start read component threads //*** Finish creating threads for read operation @@ -229,6 +227,7 @@ int PGReader::read_pq_copy() while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); } @@ -259,7 +258,6 @@ int PGReader::read_pq_copy() int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalThreadWrittenBuffers) { - xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "deser", "start"}); int outBid; @@ -421,13 +419,14 @@ int PGReader::deserializePG(int thr, int &totalThreadWrittenTuples, int &totalTh int PGReader::readPG(int thr) { - + xdbcEnv->activeReadThreads.fetch_add(1); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "start"}); int curBid = xdbcEnv->freeBufferPtr->pop(); xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "pop"}); - Part curPart = xdbcEnv->partPtr->pop(); + int part_id = xdbcEnv->readPartPtr->pop(); + Part curPart; std::byte *writePtr = bp[curBid].data() + sizeof(Header); size_t sizeWritten = 0; @@ -443,9 +442,9 @@ int PGReader::readPG(int thr) conninfo = "dbname = db1 user = postgres password = 123456 host = pg1 port = 5432"; connection = PQconnectdb(conninfo); - while (curPart.id != -1) + while (part_id != -1) { - + curPart = xdbcEnv->readPart_info[part_id]; char *receiveBuffer = NULL; int receiveLength = 0; const int asynchronous = 0; @@ -502,7 +501,7 @@ int PGReader::readPG(int thr) tuplesPerBuffer++; } - curPart = xdbcEnv->partPtr->pop(); + part_id = xdbcEnv->readPartPtr->pop(); spdlog::get("XDBC.SERVER")->info("PG thread {0}: Exiting PQgetCopyData loop, tupleNo: {1}", thr, tuplesRead); // we now check the last received length returned by copy data @@ -556,7 +555,8 @@ int PGReader::readPG(int thr) xdbcEnv->deserBufferPtr->push(curBid); xdbcEnv->finishedReadThreads.fetch_add(1); - if (xdbcEnv->finishedReadThreads == xdbcEnv->read_parallelism) + xdbcEnv->activeReadThreads.fetch_add(-1); + if (xdbcEnv->activeReadThreads == 0) { xdbcEnv->enable_updation_DS = 0; xdbcEnv->enable_updation_xServe = 0; diff --git a/DataSources/PQReader/PQReader.cpp b/DataSources/PQReader/PQReader.cpp index a11165d..cd66574 100644 --- a/DataSources/PQReader/PQReader.cpp +++ b/DataSources/PQReader/PQReader.cpp @@ -425,7 +425,7 @@ int PQReader::readPQ(int thr) } // Fetch the next partition - xdbcEnv->readPartPtr->pop(); + part_id = xdbcEnv->readPartPtr->pop(); } xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "end"}); From 8a42246e7022499bd43ec5c8dba627cd9e5a20ee Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Sun, 9 Mar 2025 22:34:31 +0100 Subject: [PATCH 12/19] Reconfigure inside websocket --- DataSources/CHReader/CHReader.cpp | 9 ++++----- DataSources/CSVReader/CSVReader.cpp | 2 -- DataSources/PGReader/PGReader.cpp | 2 -- DataSources/PQReader/PQReader.cpp | 2 -- main.cpp | 3 +++ xdbcserver.cpp | 1 - 6 files changed, 7 insertions(+), 12 deletions(-) diff --git a/DataSources/CHReader/CHReader.cpp b/DataSources/CHReader/CHReader.cpp index 2b183dc..0eef9ac 100644 --- a/DataSources/CHReader/CHReader.cpp +++ b/DataSources/CHReader/CHReader.cpp @@ -88,14 +88,13 @@ void CHReader::readData() xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start deserialize component threads //*** Finish creating threads for deserialize operation - if (xdbcEnv->spawn_source == 1) - { - xdbcEnv->enable_updation_DS = 1; - } + // if (xdbcEnv->spawn_source == 1) + // { + // xdbcEnv->enable_updation_DS = 1; + // } // while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed // { // std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - // xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); // } xdbcEnv->env_manager_DS.joinThreads("read"); diff --git a/DataSources/CSVReader/CSVReader.cpp b/DataSources/CSVReader/CSVReader.cpp index ddb305a..996213e 100644 --- a/DataSources/CSVReader/CSVReader.cpp +++ b/DataSources/CSVReader/CSVReader.cpp @@ -197,8 +197,6 @@ void CSVReader::readData() while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); - xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); } // Wait for read to finish and then kill deserialize xdbcEnv->env_manager_DS.joinThreads("read"); diff --git a/DataSources/PGReader/PGReader.cpp b/DataSources/PGReader/PGReader.cpp index 1851a65..d10dd7e 100644 --- a/DataSources/PGReader/PGReader.cpp +++ b/DataSources/PGReader/PGReader.cpp @@ -227,8 +227,6 @@ int PGReader::read_pq_copy() while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); - xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); } // Wait for read to finish and then kill deserialize diff --git a/DataSources/PQReader/PQReader.cpp b/DataSources/PQReader/PQReader.cpp index cd66574..e2f273f 100644 --- a/DataSources/PQReader/PQReader.cpp +++ b/DataSources/PQReader/PQReader.cpp @@ -100,8 +100,6 @@ void PQReader::readData() while (xdbcEnv->enable_updation_DS == 1) // Reconfigure threads as long as it is allowed { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); - xdbcEnv->env_manager_DS.configureThreads("deserialize", xdbcEnv->deser_parallelism); } // Wait for read to finish and then kill deserialize diff --git a/main.cpp b/main.cpp index 196b932..33b4795 100755 --- a/main.cpp +++ b/main.cpp @@ -201,10 +201,13 @@ void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) { env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); + env.env_manager_DS.configureThreads("deserialize", env.deser_parallelism); + env.env_manager_DS.configureThreads("read", env.read_parallelism); } if (env.enable_updation_xServe == 1) { env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); + env.env_manager_xServer.configureThreads("compress", env.compression_parallelism); } } catch (const std::exception &e) diff --git a/xdbcserver.cpp b/xdbcserver.cpp index 2888805..4707a3f 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -392,7 +392,6 @@ int XDBCServer::serve() while (xdbcEnv->enable_updation_xServe == 1) // Reconfigure threads as long as it is allowed { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - xdbcEnv->env_manager_xServer.configureThreads("compress", xdbcEnv->compression_parallelism); } // Join all the threads t1.join(); From 65cdff5b11e7df6a924e12d099a934c3ac046032 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Mon, 10 Mar 2025 23:31:35 +0100 Subject: [PATCH 13/19] Fix bug in PG and PQ --- DataSources/PGReader/PGReader.cpp | 8 ++++++-- DataSources/PQReader/PQReader.cpp | 2 +- main.cpp | 11 +++++++---- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/DataSources/PGReader/PGReader.cpp b/DataSources/PGReader/PGReader.cpp index d10dd7e..a0767d9 100644 --- a/DataSources/PGReader/PGReader.cpp +++ b/DataSources/PGReader/PGReader.cpp @@ -160,7 +160,7 @@ int PGReader::read_pq_copy() int maxRowNum = getMaxCtId(tableName); - int partNum = xdbcEnv->read_parallelism; + int partNum = xdbcEnv->read_partitions; div_t partSizeDiv = div(maxRowNum, partNum); int partSize = partSizeDiv.quot; @@ -228,7 +228,9 @@ int PGReader::read_pq_copy() { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); } - + std::this_thread::sleep_for(std::chrono::milliseconds(30000)); + xdbcEnv->read_parallelism = 4; + xdbcEnv->env_manager_DS.configureThreads("read", xdbcEnv->read_parallelism); // start read component threads // Wait for read to finish and then kill deserialize xdbcEnv->env_manager_DS.joinThreads("read"); xdbcEnv->env_manager_DS.configureThreads("deserialize", 0); @@ -424,6 +426,7 @@ int PGReader::readPG(int thr) xdbcEnv->pts->push(ProfilingTimestamps{std::chrono::high_resolution_clock::now(), thr, "read", "pop"}); int part_id = xdbcEnv->readPartPtr->pop(); + spdlog::get("XDBC.SERVER")->info("Read Thread {0} partition {1}", thr, part_id); Part curPart; std::byte *writePtr = bp[curBid].data() + sizeof(Header); @@ -442,6 +445,7 @@ int PGReader::readPG(int thr) while (part_id != -1) { + spdlog::get("XDBC.SERVER")->info("Read Thread {0} partition {1} time ", thr, part_id); curPart = xdbcEnv->readPart_info[part_id]; char *receiveBuffer = NULL; int receiveLength = 0; diff --git a/DataSources/PQReader/PQReader.cpp b/DataSources/PQReader/PQReader.cpp index e2f273f..f87ac70 100644 --- a/DataSources/PQReader/PQReader.cpp +++ b/DataSources/PQReader/PQReader.cpp @@ -33,7 +33,7 @@ void PQReader::readData() spdlog::get("XDBC.SERVER")->info("Parquet files: {0}", numFiles); - int partNum = xdbcEnv->read_parallelism; + int partNum = xdbcEnv->read_partitions; div_t partSizeDiv = div(numFiles, partNum); int partSize = partSizeDiv.quot; diff --git a/main.cpp b/main.cpp index 33b4795..3101456 100755 --- a/main.cpp +++ b/main.cpp @@ -181,10 +181,13 @@ nlohmann::json additional_msg(RuntimeEnv &env) { nlohmann::json metrics_json = nlohmann::json::object(); // Use a JSON object metrics_json["totalTime_ms"] = env.tf_paras.elapsed_time; - metrics_json["readBufferQ_load"] = std::get<0>(env.tf_paras.latest_queueSizes); - metrics_json["deserializedBufferQ_load"] = std::get<1>(env.tf_paras.latest_queueSizes); - metrics_json["compressedBufferQ_load"] = std::get<2>(env.tf_paras.latest_queueSizes); - metrics_json["sendBufferQ_load"] = std::get<3>(env.tf_paras.latest_queueSizes); + if (env.enable_updation_DS == 1) + { + metrics_json["readBufferQ_load"] = std::get<0>(env.tf_paras.latest_queueSizes); + metrics_json["deserializedBufferQ_load"] = std::get<1>(env.tf_paras.latest_queueSizes); + metrics_json["compressedBufferQ_load"] = std::get<2>(env.tf_paras.latest_queueSizes); + metrics_json["sendBufferQ_load"] = std::get<3>(env.tf_paras.latest_queueSizes); + } return metrics_json; } From 84f0c92431589cd49c0399444a45a07de470ce00 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Tue, 18 Mar 2025 21:48:27 +0100 Subject: [PATCH 14/19] Modify websocket to wait for command from controller --- ControllerInterface/WebSocketClient.cpp | 114 +++++++++++++++--------- 1 file changed, 70 insertions(+), 44 deletions(-) diff --git a/ControllerInterface/WebSocketClient.cpp b/ControllerInterface/WebSocketClient.cpp index 8fd7e2e..9b71390 100644 --- a/ControllerInterface/WebSocketClient.cpp +++ b/ControllerInterface/WebSocketClient.cpp @@ -3,11 +3,13 @@ #include WebSocketClient::WebSocketClient(const std::string &host, const std::string &port) - : host_(host), port_(port), resolver_(ioc_), ws_(ioc_), timer_(ioc_), active_(false), stop_thread_(false), - operation_started_(false) {} + : host_(host), port_(port), resolver_(ioc_), ws_(ioc_), timer_(ioc_), active_(false), stop_thread_(false), + operation_started_(false) {} -void WebSocketClient::start() { - try { +void WebSocketClient::start() +{ + try + { // Resolve host and port auto results = resolver_.resolve(host_, port_); // Connect to the first resolved endpoint @@ -26,62 +28,59 @@ void WebSocketClient::start() { auto start_time = std::chrono::steady_clock::now(); const std::chrono::seconds timeout(10); // Set a timeout duration - while (!acknowledged) { + while (!acknowledged) + { // Check for timeout auto elapsed = std::chrono::steady_clock::now() - start_time; - if (elapsed > timeout) { + if (elapsed > timeout) + { spdlog::error("Timeout waiting for server acknowledgment."); throw std::runtime_error("Server acknowledgment timeout"); } // Attempt to read the acknowledgment - try { + try + { ws_.read(buffer); std::string ack_response = beast::buffers_to_string(buffer.data()); spdlog::info("Received acknowledgment: {}", ack_response); // Parse and check acknowledgment json ack_json = json::parse(ack_response); - if (ack_json["operation"] == "acknowledged") { + if (ack_json["operation"] == "acknowledged") + { acknowledged = true; operation_started_ = true; // Set flag indicating acknowledgment received spdlog::info("Server acknowledged the start request."); - } else { + } + else + { spdlog::warn("Server response does not acknowledge start: {}", ack_json.dump()); // throw std::runtime_error("Server rejected start request"); } } - catch (const std::exception &e) { + catch (const std::exception &e) + { spdlog::error("Error while waiting for acknowledgment: {}", e.what()); // Optional: Retry after a short delay std::this_thread::sleep_for(std::chrono::milliseconds(500)); } } } - catch (const std::exception &e) { + catch (const std::exception &e) + { spdlog::error("WebSocket Client Error during start: {}", e.what()); throw; // Rethrow the exception to notify the caller } } -void WebSocketClient::periodic_communication() { - try { - while (!stop_thread_) { - // Convert metrics to JSON and send it - json metrics_json = metrics_convert_(); - json addtnl_info = additional_msg_(); - json combined_payload = metrics_json; - for (auto &[key, value]: addtnl_info.items()) { - combined_payload[key] = value; - } - // json metrics_json = {{"waiting_time", "100ms"}}; - json request_json = { - {"operation", "get_environment"}, - {"payload", combined_payload} // Include metrics in the payload - }; - ws_.write(asio::buffer(request_json.dump())); - - // Read response from server +void WebSocketClient::periodic_communication() +{ + try + { + while (!stop_thread_) + { + // Read command from server beast::flat_buffer buffer; ws_.read(buffer); std::string env_response = beast::buffers_to_string(buffer.data()); @@ -89,28 +88,49 @@ void WebSocketClient::periodic_communication() { // Parse and process the response json env_json = json::parse(env_response); - if (env_json["operation"] == "set_environment") { + if (env_json["operation"] == "set_environment") + { json payload = env_json["payload"]; env_convert_(payload); // Process environment data from payload - } else { + } + else + { spdlog::warn("Unexpected operation received: {}", env_json["operation"]); } + // Convert metrics to JSON and send it + json metrics_json = metrics_convert_(); + json addtnl_info = additional_msg_(); + json combined_payload = metrics_json; + for (auto &[key, value] : addtnl_info.items()) + { + combined_payload[key] = value; + } + // json metrics_json = {{"waiting_time", "100ms"}}; + json request_json = { + {"operation", "get_environment"}, + {"payload", combined_payload} // Include metrics in the payload + }; + ws_.write(asio::buffer(request_json.dump())); // Wait for 1 second before next communication - std::this_thread::sleep_for(std::chrono::seconds(2)); + // std::this_thread::sleep_for(std::chrono::seconds(2)); active_ = true; } } - catch (const std::exception &e) { + catch (const std::exception &e) + { std::cerr << "Error in periodic communication: " << e.what() << std::endl; } } void WebSocketClient::run(std::function metrics_convert, std::function additional_msg, - std::function env_convert) { - try { + std::function env_convert) +{ + try + { // Wait until the operation has started and acknowledgment is received - while (!operation_started_) { + while (!operation_started_) + { std::this_thread::sleep_for(std::chrono::milliseconds(100)); // Wait briefly before checking again } @@ -124,21 +144,25 @@ void WebSocketClient::run(std::function metrics_convert, std::function Date: Mon, 24 Mar 2025 07:31:09 +0100 Subject: [PATCH 15/19] Calculate latest metrics without copying --- Dockerfile | 2 + customQueue.h | 55 +++++++++-------- main.cpp | 11 ++-- metrics_calculator.h | 136 ++++++++++++++++++++++++++++++------------- 4 files changed, 132 insertions(+), 72 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9a9317f..8a2212c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -70,6 +70,8 @@ ADD xdbcserver.h /xdbc-server/ ADD metrics_calculator.h /xdbc-server/ ADD Compression /xdbc-server/Compression ADD DataSources /xdbc-server/DataSources +ADD ControllerInterface /xdbc-server/ControllerInterface +ADD EnvironmentReconfigure /xdbc-server/EnvironmentReconfigure RUN ls /xdbc-server RUN rm -rf /xdbc-server/CMakeCache.txt diff --git a/customQueue.h b/customQueue.h index a81cc32..15d04a4 100644 --- a/customQueue.h +++ b/customQueue.h @@ -2,8 +2,9 @@ #include #include -template -class customQueue { +template +class customQueue +{ private: std::mutex d_mutex; std::condition_variable d_condition; @@ -15,30 +16,36 @@ class customQueue { public: explicit customQueue(size_t max_capacity = 0) : capacity(max_capacity) {} - void push(T const &value) { + void push(T const &value) + { { std::unique_lock lock(this->d_mutex); - this->d_space_available.wait(lock, [=] { return capacity == 0 || d_queue.size() < capacity; }); + this->d_space_available.wait(lock, [=] + { return capacity == 0 || d_queue.size() < capacity; }); d_queue.push_front(value); } this->d_condition.notify_all(); } - T pop() { + T pop() + { std::unique_lock lock(this->d_mutex); - this->d_condition.wait(lock, [=] { return !this->d_queue.empty(); }); + this->d_condition.wait(lock, [=] + { return !this->d_queue.empty(); }); T rc(std::move(this->d_queue.back())); this->d_queue.pop_back(); this->d_space_available.notify_all(); // Notify threads waiting for space return rc; } - [[nodiscard]] size_t size() { + [[nodiscard]] size_t size() + { std::unique_lock lock(this->d_mutex); return d_queue.size(); } - void setCapacity(size_t new_capacity) { + void setCapacity(size_t new_capacity) + { { std::unique_lock lock(this->d_mutex); capacity = new_capacity; @@ -47,23 +54,23 @@ class customQueue { } // Get the current capacity - [[nodiscard]] size_t getCapacity() const { + [[nodiscard]] size_t getCapacity() const + { return capacity; } - std::vector copy_newElements() { - static size_t lastCopiedIndex = 0; // Tracks the last copied position - std::vector new_elements; // To store new elements - auto current_index = d_queue.size(); - { - // std::unique_lock lock(this->d_mutex); // Lock for thread safety - if (lastCopiedIndex < - current_index) { // Check if there are new elements - new_elements.assign(d_queue.rbegin(), d_queue.rbegin() + (d_queue.size() - - lastCopiedIndex)); // Reverse copy the new elements - lastCopiedIndex = current_index; // Update the index for the next call - } - } - return new_elements; // Return new elements in reverse order + auto begin() + { + return d_queue.rbegin(); + } + + auto beginFrom(size_t offset) + { + return d_queue.rbegin() + offset; + } + + auto end() + { + return d_queue.rend(); } -}; \ No newline at end of file +}; diff --git a/main.cpp b/main.cpp index 3101456..d3b477b 100755 --- a/main.cpp +++ b/main.cpp @@ -155,9 +155,8 @@ nlohmann::json metrics_convert(RuntimeEnv &env) if ((env.pts) && (env.enable_updation_DS == 1) && (env.enable_updation_xServe == 1)) { - std::vector env_pts; - env_pts = env.pts->copy_newElements(); - auto component_metrics_ = calculate_metrics(env_pts, env.buffer_size); + auto &env_pts = *(env.pts); + auto component_metrics_ = calculate_metrics(env_pts, env.buffer_size, true); for (const auto &pair : component_metrics_) { @@ -254,11 +253,9 @@ int main(int argc, char *argv[]) xdbcEnv.tf_paras.elapsed_time = static_cast(total_time); spdlog::get("XDBC.SERVER")->info("Total elapsed time: {} ms", total_time); - auto pts = std::vector(xdbcEnv.pts->size()); - while (xdbcEnv.pts->size() != 0) - pts.push_back(xdbcEnv.pts->pop()); + auto &env_pts = *(xdbcEnv.pts); + auto component_metrics = calculate_metrics(env_pts, xdbcEnv.buffer_size); - auto component_metrics = calculate_metrics(pts, xdbcEnv.buffer_size); std::ostringstream totalTimes; std::ostringstream procTimes; std::ostringstream waitingTimes; diff --git a/metrics_calculator.h b/metrics_calculator.h index 9d37854..654dbb4 100644 --- a/metrics_calculator.h +++ b/metrics_calculator.h @@ -10,7 +10,8 @@ #include // Define the Metrics struct -struct Metrics { +struct Metrics +{ double waiting_time_ms; double processing_time_ms; double overall_time_ms; @@ -24,33 +25,53 @@ struct Metrics { }; // Helper function to calculate standard deviation -double calculate_stddev(const std::vector &values, double mean) { +double calculate_stddev(const std::vector &values, double mean) +{ double sum = 0.0; - for (const auto &value: values) { + for (const auto &value : values) + { sum += (value - mean) * (value - mean); } return std::sqrt(sum / values.size()); } -// Function to calculate metrics per component and then aggregate them std::unordered_map -calculate_metrics(const std::vector ×tamps, size_t buffer_size_kb) { +calculate_metrics(customQueue ×tamps, size_t buffer_size_kb, bool is_latest = false) +{ size_t buffer_size_bytes = buffer_size_kb * 1024; // Convert buffer size to bytes std::unordered_map>> events_per_component_thread; + static size_t lastCopiedIndex = 0; // Tracks the last copied position + if (is_latest == true) + { + // Create a map to keep track of the count of timestamps per component and thread + std::map, int> timestamp_counts; - // Group timestamps by component and thread - for (const auto &ts: timestamps) { - events_per_component_thread[ts.component][ts.thread].push_back(ts); + for (auto it = timestamps.beginFrom(lastCopiedIndex); it != timestamps.end(); ++it) + { + const auto &ts = *it; + events_per_component_thread[ts.component][ts.thread].push_back(ts); + } + lastCopiedIndex = timestamps.size(); + } + else + { + for (auto it = timestamps.begin(); it != timestamps.end(); ++it) + { + const auto &ts = *it; + events_per_component_thread[ts.component][ts.thread].push_back(ts); + } } std::unordered_map component_metrics; // Calculate metrics per component - for (const auto &[component, events_per_thread]: events_per_component_thread) { + for (const auto &[component, events_per_thread] : events_per_component_thread) + { std::vector thread_metrics; size_t total_buffers_processed = 0; - for (const auto &[thread_id, events]: events_per_thread) { + for (const auto &[thread_id, events] : events_per_thread) + { Metrics metrics = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; std::chrono::high_resolution_clock::time_point start_time, push_time, pop_time, end_time; @@ -65,29 +86,42 @@ calculate_metrics(const std::vector ×tamps, size_t buf const auto &first_element = events.front(); // Store the first event in the loop const auto &last_element = events.back(); // Store the last event in the loop - for (const auto &event: events) { - if (event.event == "start") { + for (const auto &event : events) + { + if (event.event == "start") + { start_time = event.timestamp; has_start_time = true; - } else if (event.event == "pop") { + } + else if (event.event == "pop") + { pop_time = event.timestamp; - if (has_push_time) { + if (has_push_time) + { waiting_time += pop_time - push_time; // Waiting time is pop_time - previous push_time - } else if (has_start_time) { + } + else if (has_start_time) + { waiting_time += pop_time - start_time; // Initial waiting time is pop_time - start_time } has_pop_time = true; - } else if (event.event == "push") { + } + else if (event.event == "push") + { push_time = event.timestamp; has_push_time = true; - if (has_pop_time) { + if (has_pop_time) + { processing_time += push_time - pop_time; // Processing time is push_time - pop_time thread_buffers_processed++; } - } else if (event.event == "end") { + } + else if (event.event == "end") + { end_time = event.timestamp; has_end_time = true; - if (has_pop_time) { + if (has_push_time) + { processing_time += end_time - push_time; // Finalize the processing time } } @@ -95,27 +129,37 @@ calculate_metrics(const std::vector ×tamps, size_t buf metrics.waiting_time_ms = std::chrono::duration_cast(waiting_time).count(); metrics.processing_time_ms = std::chrono::duration_cast(processing_time).count(); - if (has_end_time && has_start_time) { + if (has_end_time && has_start_time) + { metrics.overall_time_ms = std::chrono::duration_cast( - std::chrono::duration(end_time - start_time)) - .count(); - } else { + std::chrono::duration(end_time - start_time)) + .count(); + } + else + { metrics.overall_time_ms = std::chrono::duration_cast( - std::chrono::duration(last_element.timestamp - first_element.timestamp)) - .count(); + std::chrono::duration(last_element.timestamp - first_element.timestamp)) + .count(); + } + if (metrics.processing_time_ms == 0) + { + metrics.processing_time_ms = 1; } + total_buffers_processed += thread_buffers_processed; // Calculate the total throughput in bytes per second for this thread - if (metrics.overall_time_ms > 0) { + if (metrics.overall_time_ms > 0) + { metrics.total_throughput = - (thread_buffers_processed * buffer_size_bytes) / (metrics.overall_time_ms / 1000.0); + (thread_buffers_processed * buffer_size_bytes) / (metrics.overall_time_ms / 1000.0); } // Calculate the per buffer throughput in bytes per second for this thread - if (metrics.processing_time_ms > 0) { + if (metrics.processing_time_ms > 0) + { metrics.per_buffer_throughput = - (thread_buffers_processed * buffer_size_bytes) / (metrics.processing_time_ms / 1000.0); + (thread_buffers_processed * buffer_size_bytes) / (metrics.processing_time_ms / 1000.0); } // Convert throughput to MB/s @@ -129,33 +173,39 @@ calculate_metrics(const std::vector ×tamps, size_t buf Metrics aggregated_metrics; size_t num_threads = thread_metrics.size(); aggregated_metrics.waiting_time_ms = std::accumulate(thread_metrics.begin(), thread_metrics.end(), 0.0, - [](const auto &sum, const auto &m) { + [](const auto &sum, const auto &m) + { return sum + m.waiting_time_ms; }) / num_threads; aggregated_metrics.processing_time_ms = std::accumulate(thread_metrics.begin(), thread_metrics.end(), 0.0, - [](const auto &sum, const auto &m) { + [](const auto &sum, const auto &m) + { return sum + m.processing_time_ms; }) / num_threads; aggregated_metrics.overall_time_ms = std::accumulate(thread_metrics.begin(), thread_metrics.end(), 0.0, - [](const auto &sum, const auto &m) { + [](const auto &sum, const auto &m) + { return sum + m.overall_time_ms; }) / num_threads; aggregated_metrics.total_throughput = std::accumulate(thread_metrics.begin(), thread_metrics.end(), 0.0, - [](const auto &sum, const auto &m) { + [](const auto &sum, const auto &m) + { return sum + m.total_throughput; }); aggregated_metrics.per_buffer_throughput = std::accumulate(thread_metrics.begin(), thread_metrics.end(), 0.0, - [](const auto &sum, const auto &m) { + [](const auto &sum, const auto &m) + { return sum + m.per_buffer_throughput; }) / num_threads; // Calculate standard deviations std::vector waiting_times, processing_times, overall_times, total_throughputs, per_buffer_throughputs; - for (const auto &m: thread_metrics) { + for (const auto &m : thread_metrics) + { waiting_times.push_back(m.waiting_time_ms); processing_times.push_back(m.processing_time_ms); overall_times.push_back(m.overall_time_ms); @@ -178,7 +228,8 @@ calculate_metrics(const std::vector ×tamps, size_t buf return component_metrics; } -std::tuple printAndReturnAverageLoad(RuntimeEnv &_xdbcenv) { +std::tuple printAndReturnAverageLoad(RuntimeEnv &_xdbcenv) +{ long long totalTimestamps = 0; size_t totalReadBufferIdsSize = 0; size_t totalDeserBufferIdsSize = 0; @@ -187,7 +238,8 @@ std::tuple printAndReturnAverageLoad(RuntimeEnv size_t recordCount = _xdbcenv.queueSizes.size(); std::tuple ret(0, 0, 0, 0); - for (const auto &record: _xdbcenv.queueSizes) { + for (const auto &record : _xdbcenv.queueSizes) + { totalTimestamps += std::get<0>(record); totalReadBufferIdsSize += std::get<1>(record); totalDeserBufferIdsSize += std::get<2>(record); @@ -195,7 +247,8 @@ std::tuple printAndReturnAverageLoad(RuntimeEnv totalNetworkBufferIdsSize += std::get<4>(record); } - if (recordCount > 0) { + if (recordCount > 0) + { double avgReadBufferIdsSize = static_cast(totalReadBufferIdsSize) / recordCount; double avgDeserBufferIdsSize = static_cast(totalDeserBufferIdsSize) / recordCount; double avgCompressedBufferIdsSize = static_cast(totalDeserBufferIdsSize) / recordCount; @@ -205,9 +258,10 @@ std::tuple printAndReturnAverageLoad(RuntimeEnv avgNetworkBufferIdsSize); spdlog::get("XDBC.SERVER")->info("Average Load of Queues: Reader, Deserializer, Compressor, Sender"); - spdlog::get("XDBC.SERVER")->info("{0}\t{1}\t{2}\t{3}", avgReadBufferIdsSize, avgDeserBufferIdsSize, - avgCompressedBufferIdsSize, avgNetworkBufferIdsSize); - } else { + spdlog::get("XDBC.SERVER")->info("{0}\t{1}\t{2}\t{3}", avgReadBufferIdsSize, avgDeserBufferIdsSize, avgCompressedBufferIdsSize, avgNetworkBufferIdsSize); + } + else + { spdlog::get("XDBC.SERVER")->info("No records available to calculate averages."); } From dc74ec491444b6152a8640d207309976cc8a3f3d Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Wed, 2 Apr 2025 07:11:51 +0200 Subject: [PATCH 16/19] COnvert q load to percentage --- xdbcserver.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/xdbcserver.cpp b/xdbcserver.cpp index 4707a3f..c581008 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -70,7 +70,7 @@ XDBCServer::XDBCServer(RuntimeEnv &xdbcEnv) // initialize free queue and partition queue xdbcEnv.freeBufferPtr = std::make_shared>(); xdbcEnv.readPartPtr = std::make_shared>(); - + xdbcEnv.freeBufferPtr->setCapacity(xdbcEnv.buffers_in_bufferpool); // initially all buffers are put in the free buffer queue for (int i = 0; i < xdbcEnv.buffers_in_bufferpool; i++) xdbcEnv.freeBufferPtr->push(i); @@ -150,10 +150,21 @@ void XDBCServer::monitorQueues() size_t compressedBufferTotalSize = xdbcEnv->compBufferPtr->size(); size_t sendBufferTotalSize = xdbcEnv->sendBufferPtr->size(); + float readBufferLoadFloat = (readBufferTotalSize * 100.0f) / xdbcEnv->freeBufferPtr->getCapacity(); + float deserBufferLoadFloat = (deserBufferTotalSize * 100.0f) / xdbcEnv->deserBufferPtr->getCapacity(); + float compressedBufferLoadFloat = (compressedBufferTotalSize * 100.0f) / xdbcEnv->compBufferPtr->getCapacity(); + float sendBufferLoadFLoat = (sendBufferTotalSize * 100.0f) / xdbcEnv->sendBufferPtr->getCapacity(); + + size_t readBufferLoad = static_cast(readBufferLoadFloat); + size_t deserBufferLoad = static_cast(deserBufferLoadFloat); + size_t compressedBufferLoad = static_cast(compressedBufferLoadFloat); + size_t sendBufferLoad = static_cast(sendBufferLoadFLoat); + // Store the measurement as a tuple xdbcEnv->queueSizes.emplace_back(curTimeInterval, readBufferTotalSize, deserBufferTotalSize, compressedBufferTotalSize, sendBufferTotalSize); xdbcEnv->tf_paras.latest_queueSizes = std::make_tuple(readBufferTotalSize, deserBufferTotalSize, compressedBufferTotalSize, sendBufferTotalSize); + xdbcEnv->tf_paras.latest_queueSizes = std::make_tuple(readBufferTotalSize, deserBufferTotalSize, compressedBufferTotalSize, sendBufferTotalSize); std::this_thread::sleep_for(std::chrono::milliseconds(xdbcEnv->profilingInterval)); curTimeInterval += xdbcEnv->profilingInterval / 1000; From 564df081eb0e395172aa5135c0f2ed66d6c49285 Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Wed, 2 Apr 2025 10:48:10 +0200 Subject: [PATCH 17/19] Revert docker-xdbc.yml --- docker-xdbc.yml | 40 ++++++++++++++++++++-------------------- xdbcserver.cpp | 3 +-- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/docker-xdbc.yml b/docker-xdbc.yml index 67215a6..a42f9d2 100644 --- a/docker-xdbc.yml +++ b/docker-xdbc.yml @@ -1,23 +1,23 @@ -services: - xdbc-server: - image: xdbc-server:latest - container_name: xdbcserver - restart: always - volumes: - - /dev/shm:/dev/shm - ports: - - 1234:1234 - - 1235:1235 - - 1236:1236 - - 1237:1237 - - 1238:1238 - shm_size: '16gb' - cap_add: - - NET_ADMIN - networks: - - xdbc-net - labels: - com.docker-tc.enabled: 1 +# services: +# xdbc-server: +# image: xdbc-server:latest +# container_name: xdbcserver +# restart: always +# volumes: +# - /dev/shm:/dev/shm +# ports: +# - 1234:1234 +# - 1235:1235 +# - 1236:1236 +# - 1237:1237 +# - 1238:1238 +# shm_size: '16gb' +# cap_add: +# - NET_ADMIN +# networks: +# - xdbc-net +# labels: +# com.docker-tc.enabled: 1 # xdbc-client: # image: xdbc-client:latest diff --git a/xdbcserver.cpp b/xdbcserver.cpp index c581008..a39bc4b 100755 --- a/xdbcserver.cpp +++ b/xdbcserver.cpp @@ -163,8 +163,7 @@ void XDBCServer::monitorQueues() // Store the measurement as a tuple xdbcEnv->queueSizes.emplace_back(curTimeInterval, readBufferTotalSize, deserBufferTotalSize, compressedBufferTotalSize, sendBufferTotalSize); - xdbcEnv->tf_paras.latest_queueSizes = std::make_tuple(readBufferTotalSize, deserBufferTotalSize, compressedBufferTotalSize, sendBufferTotalSize); - xdbcEnv->tf_paras.latest_queueSizes = std::make_tuple(readBufferTotalSize, deserBufferTotalSize, compressedBufferTotalSize, sendBufferTotalSize); + xdbcEnv->tf_paras.latest_queueSizes = std::make_tuple(readBufferLoad, deserBufferLoad, compressedBufferLoad, sendBufferLoad); std::this_thread::sleep_for(std::chrono::milliseconds(xdbcEnv->profilingInterval)); curTimeInterval += xdbcEnv->profilingInterval / 1000; From 81e35b1f1bd45ac8974ede402ef0609c52af032a Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Wed, 2 Apr 2025 20:38:12 +0200 Subject: [PATCH 18/19] Remove docker-xdbc.yml --- docker-xdbc.yml | 83 ------------------------------------------------- 1 file changed, 83 deletions(-) delete mode 100644 docker-xdbc.yml diff --git a/docker-xdbc.yml b/docker-xdbc.yml deleted file mode 100644 index a42f9d2..0000000 --- a/docker-xdbc.yml +++ /dev/null @@ -1,83 +0,0 @@ -# services: -# xdbc-server: -# image: xdbc-server:latest -# container_name: xdbcserver -# restart: always -# volumes: -# - /dev/shm:/dev/shm -# ports: -# - 1234:1234 -# - 1235:1235 -# - 1236:1236 -# - 1237:1237 -# - 1238:1238 -# shm_size: '16gb' -# cap_add: -# - NET_ADMIN -# networks: -# - xdbc-net -# labels: -# com.docker-tc.enabled: 1 - - # xdbc-client: - # image: xdbc-client:latest - # container_name: xdbcclient - # restart: always - # volumes: - # - /dev/shm:/dev/shm - # shm_size: '16gb' - # cap_add: - # - NET_ADMIN - # networks: - # - xdbc-net - # labels: - # com.docker-tc.enabled: 1 - - #xdbc-python: - # image: xdbc-python:latest - # container_name: xdbcpython - # restart: always - # volumes: - # - /dev/shm:/dev/shm - # shm_size: '16gb' - # cap_add: - # - NET_ADMIN - # networks: - # - xdbc-net - # labels: - # com.docker-tc.enabled: 1 - - #postgres-1: - # image: postgresdb - # container_name: pg1 - # restart: always - # volumes: - # - test-data:/data - # - pgvolume1:/var/lib/postgresql/ - # ports: - # - 15432:5432 - # shm_size: '2gb' - # networks: - # - xdbc-net - -# clickhouse: -# image: clickhousedb -# container_name: ch -# privileged: true -# ports: -# - 19000:9000 -# - 18123:8123 -# volumes: -# - test-data:/data -# - chdbvol:/var/lib/clickhouse -# shm_size: '2gb' - -volumes: - test-data: - external: true - pgvolume1: - chdbvol: - -networks: - xdbc-net: - external: true \ No newline at end of file From 44810efee059dbef5de716f51db6467a76d4c67d Mon Sep 17 00:00:00 2001 From: midhun_kv Date: Wed, 2 Apr 2025 21:18:23 +0200 Subject: [PATCH 19/19] Resolve conflicts in main --- main.cpp | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/main.cpp b/main.cpp index 8282f3d..d3b477b 100755 --- a/main.cpp +++ b/main.cpp @@ -153,17 +153,10 @@ nlohmann::json metrics_convert(RuntimeEnv &env) nlohmann::json metrics_json = nlohmann::json::object(); // Use a JSON object // auto env_pts = env->pts->copyAll(); -<<<<<<< HEAD if ((env.pts) && (env.enable_updation_DS == 1) && (env.enable_updation_xServe == 1)) { auto &env_pts = *(env.pts); auto component_metrics_ = calculate_metrics(env_pts, env.buffer_size, true); -======= - if ((env.pts) && (env.enable_updation_DS == 1) && (env.enable_updation_xServe == 1)) { - std::vector env_pts; - env_pts = env.pts->copy_newElements(); - auto component_metrics_ = calculate_metrics(env_pts, env.buffer_size); ->>>>>>> main for (const auto &pair : component_metrics_) { @@ -187,12 +180,8 @@ nlohmann::json additional_msg(RuntimeEnv &env) { nlohmann::json metrics_json = nlohmann::json::object(); // Use a JSON object metrics_json["totalTime_ms"] = env.tf_paras.elapsed_time; -<<<<<<< HEAD if (env.enable_updation_DS == 1) { -======= - if ((env.enable_updation_DS == 1) && (env.enable_updation_xServe == 1)) { ->>>>>>> main metrics_json["readBufferQ_load"] = std::get<0>(env.tf_paras.latest_queueSizes); metrics_json["deserializedBufferQ_load"] = std::get<1>(env.tf_paras.latest_queueSizes); metrics_json["compressedBufferQ_load"] = std::get<2>(env.tf_paras.latest_queueSizes); @@ -201,37 +190,24 @@ nlohmann::json additional_msg(RuntimeEnv &env) return metrics_json; } -<<<<<<< HEAD void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) { try { -======= -void env_convert(RuntimeEnv &env, const nlohmann::json &env_json) { - try { ->>>>>>> main // env.buffer_size = std::stoi(env_json.at("bufferSize").get()); // env.buffers_in_bufferpool = std::stoi(env_json.at("bufferpoolSize").get()) / env_.buffer_size; // env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); // env.network_parallelism = std::stoi(env_json.at("netParallelism").get()); -<<<<<<< HEAD if (env.enable_updation_DS == 1) { -======= - if (env.enable_updation_DS == 1) { ->>>>>>> main env.read_parallelism = std::stoi(env_json.at("readParallelism").get()); env.deser_parallelism = std::stoi(env_json.at("deserParallelism").get()); env.env_manager_DS.configureThreads("deserialize", env.deser_parallelism); env.env_manager_DS.configureThreads("read", env.read_parallelism); } -<<<<<<< HEAD if (env.enable_updation_xServe == 1) { -======= - if (env.enable_updation_xServe == 1) { ->>>>>>> main env.compression_parallelism = std::stoi(env_json.at("compParallelism").get()); env.env_manager_xServer.configureThreads("compress", env.compression_parallelism); }