diff --git a/.gitignore b/.gitignore index dfef3803..c54d8736 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ recentMorphStoreProjectConf.log Dockerfile .DS_Store doc/doxygen/latex -doc/doxygen/html \ No newline at end of file +doc/doxygen/html +.vscode/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index c54c35c0..298a25b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required( VERSION 3.10 ) project( MorphStore ) -set( CMAKE_CXX_STANDARD 14 ) +set( CMAKE_CXX_STANDARD 17 ) macro(morph_flag) add_definitions(${ARGN}) @@ -28,9 +28,11 @@ set( LOG_FILE "recentMorphStoreProjectConf.log" ) IF(CMAKE_BUILD_TYPE MATCHES Debug) morph_flag(-g) + morph_flag(-DDEBUG) message(STATUS "MorphStore is configured in DEBUG mode.") ELSEIF(CMAKE_BUILD_TYPE MATCHES Release) morph_flag(-O2) + morph_flag(-DNDEBUG) message(STATUS "MorphStore is configured in RELEASE mode.") ELSEIF(CMAKE_BUILD_TYPE MATCHES HighPerf) morph_flag(-O3) @@ -93,6 +95,10 @@ ENDIF(CODROID) # remove build type to allow for custom flag handling set(CMAKE_BUILD_TYPE "") +# add resource directory for ldbc graph +# (see https://github.com/ldbc/ldbc_snb_datagen for further instructions) +morph_flag(-DLDBC_DIR="$ENV{HOME}/ldbc_snb_datagen/social_network/") + # general compiler settings, meant for all subdirectories and tests morph_flag(-Werror) morph_flag(-pedantic) diff --git a/doc/doxygen/pages/tutorials/quick_start.md b/doc/doxygen/pages/tutorials/quick_start.md index 958c1415..a4809f83 100644 --- a/doc/doxygen/pages/tutorials/quick_start.md +++ b/doc/doxygen/pages/tutorials/quick_start.md @@ -23,7 +23,7 @@ Ensure that you have the following tools installed before trying to build: - g++ >= version 8.2 - cmake >= version 3.10 -Older versions may not build all test cases. Note that C++14 is necessary. +Older versions may not build all test cases. Note that C++17 is necessary. To facilitate building and testing MorphStore, there is a script build.sh in the root folder. @@ -42,9 +42,29 @@ build/src/examples/example_query ~~~ This builds some example queries in debug mode and runs them. The source code of these queries can be found in the folder src/examples. -They are runnig in scalar mode. Thus, every system providing C++14 support should be able to build and run them regardless of any (not) +They are runnig in scalar mode. Thus, every system providing C++17 support should be able to build and run them regardless of any (not) available vector extensions. + +The Graph Module +====================== + +The graph module mainly contains the two different graph storage formats `Compressed Sparse Row (CSR)` and `Adjacency-List`, which differ in their representation of the graph topology . +These underlying graph model is a multi-graph, with properties for vertices and edges as well as types for both. +The model is very similar to the Property-Graph model, except that vertices can only have one type (instead of multiple labels). + +The columns describing the graph topology can be compressed using formats from the MorphStore. +Besides there exists simple implementations of the graph algorithms `breadth-first search (bfs)` and `PageRank`. + + +To run all the test and micro-benchmarks, a LDBC graph has to be generated. +Instructions of how to generate the graph, can be found at `https://github.com/ldbc/ldbc_snb_datagen`. +By default the ldbc graph is expected to be at `"$HOME/ldbc_snb_datagen/social_network/"`. +This can be changed at `/Morphstore/Engine/CMakeLists.txt`. + + + + Test Vector Extensions ====================== diff --git a/include/core/morphing/decompress_column_block.h b/include/core/morphing/decompress_column_block.h new file mode 100644 index 00000000..5b244181 --- /dev/null +++ b/include/core/morphing/decompress_column_block.h @@ -0,0 +1,95 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file decompress_column_block.h + * @brief Decompressing single blocks of a column based on column_with_blockoffsets. + */ + +#ifndef MORPHSTORE_CORE_MORPHING_DECOMPRESS_COLUMN_BLOCK_H +#define MORPHSTORE_CORE_MORPHING_DECOMPRESS_COLUMN_BLOCK_H + +#include +#include +#include + +#include + +namespace morphstore { + + /** + * @brief Decompressing a range column blocks (inclusive range) + * + * @param inCol The column with block-offsets + * @param start index of blocks to be decompressed + * @param end index of blocks to be decompressed + * @return Specified blocks uncompressed in a new column + */ + template + const column *decompress_column_blocks(column_with_blockoffsets *inCol, uint64_t start, + uint64_t end) { + static_assert(compr_f::m_BlockSize != 1, "Decompressing column blocks of size 1 is not allowed"); + + auto block_size = compr_f::m_BlockSize; + auto block_count = inCol->get_block_offsets()->size(); + auto inCol_value_count = inCol->get_column()->get_count_values(); + + // validating range + assert(start <= end); + assert(start < block_count); + assert(end < block_count); + + bool last_block_uncompressed = !inCol->last_block_compressed(); + bool last_block_included = end == (block_count - 1); + + // pessimistic value_count (assuming all blocks are complete) + auto value_count = (end - start + 1) * block_size; + + if (last_block_included && last_block_uncompressed) { + // correcting value_count estimation + value_count -= block_size - inCol_value_count % block_size; + } + + // TODO: should actually be base_t? + auto alloc_size = value_count * sizeof(uint64_t); + + auto decompr_col_blocks = new column(alloc_size); + decompr_col_blocks->set_meta_data(value_count, alloc_size); + uint8_t *out8 = decompr_col_blocks->get_data(); + + for (uint64_t block = start; block <= end; block++) { + const uint8_t *block_offset = inCol->get_block_offset(block); + + if (block == end && last_block_included && last_block_uncompressed) { + // handle uncompressed part + morph_batch(block_offset, out8, inCol_value_count % block_size); + } else { + morph_batch(block_offset, out8, block_size); + } + } + + return decompr_col_blocks; + } + + template + const column *decompress_column_block(column_with_blockoffsets *inCol, uint64_t block_index) { + return decompress_column_blocks(inCol, block_index, block_index); + } + +} // namespace morphstore + +#endif // MORPHSTORE_CORE_MORPHING_DECOMPRESS_COLUMN_BLOCK_H diff --git a/include/core/morphing/graph/morph_graph_col.h b/include/core/morphing/graph/morph_graph_col.h new file mode 100644 index 00000000..989c93f2 --- /dev/null +++ b/include/core/morphing/graph/morph_graph_col.h @@ -0,0 +1,135 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file morph_graph_col.h + * @brief helper for morphing graph columns (template-free columns). Basically need to cast to template column as it + * cannot be derieved + * @todo Remove this helper and make graph formats accept templates (can use normal morph() then) + */ + +#ifndef MORPHSTORE_GRAPH_MORPH_GRAPH_COL_H +#define MORPHSTORE_GRAPH_MORPH_GRAPH_COL_H + +#include +#include +#include + +#include + +namespace morphstore { + using column_uncompr = column; + using column_dyn_vbp = column; + using column_delta = column; + using column_for = column; + + // casting the column to the actual column type before morphing (as compiler could not derive it) + // delete_old_col -> delete input column after morphing (if the result is not the input column) + const column_base *morph_graph_col(const column_base *column, const GraphCompressionFormat src_f, + const GraphCompressionFormat trg_f, bool delete_in_col = false) { + if (src_f == trg_f) { + return column; + } + + const column_base *result = column; + + switch (src_f) { + case GraphCompressionFormat::UNCOMPRESSED: { + const column_uncompr *old_col = dynamic_cast(column); + switch (trg_f) { + case GraphCompressionFormat::DELTA: + result = morph(old_col); + break; + case GraphCompressionFormat::FOR: + result = morph(old_col); + break; + case GraphCompressionFormat::DYNAMIC_VBP: + result = morph(old_col); + break; + case GraphCompressionFormat::UNCOMPRESSED: + // handled by src_f == trg_f + break; + } + break; + } + case GraphCompressionFormat::DELTA: { + if (trg_f == GraphCompressionFormat::UNCOMPRESSED) { + const column_delta *old_col = dynamic_cast(column); + result = morph(old_col); + } else { + // as direct morphing is not yet supported .. go via decompressing first + auto uncompr_col = morph_graph_col(column, src_f, GraphCompressionFormat::UNCOMPRESSED, false); + result = morph_graph_col(uncompr_col, GraphCompressionFormat::UNCOMPRESSED, trg_f, true); + } + break; + } + case GraphCompressionFormat::FOR: { + if (trg_f == GraphCompressionFormat::UNCOMPRESSED) { + const column_for *old_col = dynamic_cast(column); + result = morph(old_col); + } else { + // as direct morphing is not yet supported .. go via decompressing first + auto uncompr_col = morph_graph_col(column, src_f, GraphCompressionFormat::UNCOMPRESSED, false); + result = morph_graph_col(uncompr_col, GraphCompressionFormat::UNCOMPRESSED, trg_f, true); + } + break; + } + case GraphCompressionFormat::DYNAMIC_VBP: { + if (trg_f == GraphCompressionFormat::UNCOMPRESSED) { + const column_dyn_vbp *old_col = dynamic_cast(column); + result = morph(old_col); + } else { + // as direct morphing is not yet supported .. go via decompressing first + auto uncompr_col = morph_graph_col(column, src_f, GraphCompressionFormat::UNCOMPRESSED, false); + // delete_in_col = true as temporary uncompr_col should always be deleted + result = morph_graph_col(uncompr_col, GraphCompressionFormat::UNCOMPRESSED, trg_f, true); + } + break; + } + } + + // free input column if possible + if (result != column && delete_in_col) { + delete column; + } + + if (result == nullptr) { + throw std::runtime_error("Did not handle src: " + graph_compr_f_to_string(src_f) + + " trg: " + graph_compr_f_to_string(trg_f)); + } + + return result; + } + + const column_uncompr *decompress_graph_col(const column_base *column, const GraphCompressionFormat src_f) { + return static_cast( + morph_graph_col(column, src_f, GraphCompressionFormat::UNCOMPRESSED, false)); + } + + double compression_ratio(const column_base *col, GraphCompressionFormat col_format) { + auto uncompr_col = decompress_graph_col(col, col_format); + auto ratio = uncompr_col->get_size_used_byte() / (double)col->get_size_used_byte(); + + if (col != uncompr_col) { + delete uncompr_col; + } + + return ratio; + } +} // namespace morphstore + +#endif // MORPHSTORE_GRAPH_MORPH_GRAPH_COL_H \ No newline at end of file diff --git a/include/core/morphing/graph/morph_saving_offsets_graph_col.h b/include/core/morphing/graph/morph_saving_offsets_graph_col.h new file mode 100644 index 00000000..68c51167 --- /dev/null +++ b/include/core/morphing/graph/morph_saving_offsets_graph_col.h @@ -0,0 +1,175 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file morph_saving_offsets_graph_col.h + * @brief helper for `morph_saving_offsets()` graph column (template-free column). Basically need to cast to template + * column as it cannot be derieved + * @todo Remove this helper and make graph formats accept templates (can use normal `morph_saving_offsets()` then) + */ + +#ifndef MORPHSTORE_GRAPH_MORPH_SAVING_OFFSETS_GRAPH_COL_H +#define MORPHSTORE_GRAPH_MORPH_SAVING_OFFSETS_GRAPH_COL_H + +#include +#include +#include + +#include +#include + +#include + +namespace morphstore { + using column_uncompr = column; + using column_with_offsets_uncompr = column_with_blockoffsets; + using column__with_offsets_dyn_vbp = column_with_blockoffsets; + using column_with_offsets_delta = column_with_blockoffsets; + using column_with_offsets_for = column_with_blockoffsets; + + // casting the column to the actual column type before morphing (as compiler could not derive it) + // delete_old_col -> delete input column after morphing (if the result is not the input column) + column_with_blockoffsets_base *morph_saving_offsets_graph_col(column_with_blockoffsets_base *col, + const GraphCompressionFormat src_f, + const GraphCompressionFormat trg_f, + bool delete_in_col = false) { + if (src_f == trg_f) { + return col; + } + + auto result = col; + + switch (src_f) { + case GraphCompressionFormat::UNCOMPRESSED: { + auto old_col = dynamic_cast(col); + switch (trg_f) { + case GraphCompressionFormat::DELTA: + result = morph_saving_offsets(old_col); + break; + case GraphCompressionFormat::FOR: + result = morph_saving_offsets(old_col); + break; + case GraphCompressionFormat::DYNAMIC_VBP: + result = morph_saving_offsets(old_col); + break; + case GraphCompressionFormat::UNCOMPRESSED: + // handled by src_f == trg_f + break; + } + break; + } + case GraphCompressionFormat::DELTA: { + if (trg_f == GraphCompressionFormat::UNCOMPRESSED) { + auto old_col = dynamic_cast(col); + result = morph_saving_offsets(old_col); + } else { + // as direct morphing is not yet supported .. go via decompressing first + auto uncompr_col = morph_saving_offsets_graph_col(col, src_f, GraphCompressionFormat::UNCOMPRESSED, false); + result = + morph_saving_offsets_graph_col(uncompr_col, GraphCompressionFormat::UNCOMPRESSED, trg_f, true); + } + break; + } + case GraphCompressionFormat::FOR: { + if (trg_f == GraphCompressionFormat::UNCOMPRESSED) { + auto old_col = dynamic_cast(col); + result = morph_saving_offsets(old_col); + } else { + // as direct morphing is not yet supported .. go via decompressing first + auto uncompr_col = morph_saving_offsets_graph_col(col, src_f, GraphCompressionFormat::UNCOMPRESSED, false); + result = + morph_saving_offsets_graph_col(uncompr_col, GraphCompressionFormat::UNCOMPRESSED, trg_f, true); + } + break; + } + case GraphCompressionFormat::DYNAMIC_VBP: { + if (trg_f == GraphCompressionFormat::UNCOMPRESSED) { + auto old_col = dynamic_cast(col); + result = morph_saving_offsets(old_col); + } else { + // as direct morphing is not yet supported .. go via decompressing first + auto uncompr_col = morph_saving_offsets_graph_col(col, src_f, GraphCompressionFormat::UNCOMPRESSED, false); + // delete_in_col = true as temporary uncompr_col should always be deleted + result = + morph_saving_offsets_graph_col(uncompr_col, GraphCompressionFormat::UNCOMPRESSED, trg_f, true); + } + break; + } + } + + // free input column if possible + if (result != col && delete_in_col) { + delete col; + } + + if (result == nullptr) { + throw std::runtime_error("Did not handle src: " + graph_compr_f_to_string(src_f) + + " trg: " + graph_compr_f_to_string(trg_f)); + } + + return result; + } + + const column_uncompr *decompress_column_blocks(column_with_blockoffsets_base *col, + const GraphCompressionFormat src_f, uint64_t start, uint64_t end) { + switch (src_f) { + case GraphCompressionFormat::DELTA: { + auto casted_col = dynamic_cast(col); + return decompress_column_blocks(casted_col, start, end); + } + case GraphCompressionFormat::FOR: { + auto casted_col = dynamic_cast(col); + return decompress_column_blocks(casted_col, start, end); + } + case GraphCompressionFormat::DYNAMIC_VBP: { + auto casted_col = dynamic_cast(col); + return decompress_column_blocks(casted_col, start, end); + } + case GraphCompressionFormat::UNCOMPRESSED: { + throw std::runtime_error("Decompress a single block of size 1 is meaningless .. access directly"); + } + default: + throw std::runtime_error("Unexpected compression format" + graph_compr_f_to_string(src_f)); + } + } + + const column_uncompr *decompress_column_block(column_with_blockoffsets_base *col, + const GraphCompressionFormat src_f, uint64_t block) { + return decompress_column_blocks(col, src_f, block, block); + } + + column_with_offsets_uncompr *decompress_graph_col(column_with_blockoffsets_base *col, + const GraphCompressionFormat src_f) { + return static_cast( + morph_saving_offsets_graph_col(col, src_f, GraphCompressionFormat::UNCOMPRESSED, false)); + } + + // TODO: also consider size of blockoffset vector? + double compression_ratio(column_with_blockoffsets_base *col_with_offsets, GraphCompressionFormat col_format) { + auto uncompr_col = decompress_graph_col(col_with_offsets, col_format)->get_column(); + auto col = col_with_offsets->get_column(); + auto ratio = uncompr_col->get_size_used_byte() / (double)col->get_size_used_byte(); + + if (col != uncompr_col) { + delete uncompr_col; + } + + return ratio; + } +} // namespace morphstore + +#endif // MORPHSTORE_GRAPH_MORPH_SAVING_OFFSETS_GRAPH_COL_H \ No newline at end of file diff --git a/include/core/morphing/morph_saving_offsets.h b/include/core/morphing/morph_saving_offsets.h new file mode 100644 index 00000000..e89495f6 --- /dev/null +++ b/include/core/morphing/morph_saving_offsets.h @@ -0,0 +1,231 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file morph_saving_offset.h + * @brief based on morph.h, just calling morph_batch_t for every block and saving its offset (if blocksize > 1) + */ + +#ifndef MORPHSTORE_CORE_MORPHING_MORPH_SAVING_OFFSETS_H +#define MORPHSTORE_CORE_MORPHING_MORPH_SAVING_OFFSETS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace morphstore { + + // **************************************************************************** + // Column-level + // **************************************************************************** + + // ---------------------------------------------------------------------------- + // General interface + // ---------------------------------------------------------------------------- + + /** + * @brief A struct wrapping the actual morph_saving_offsets-operator. + * + * This is necessary to enable partial template specialization, which is + * required, since some compressed formats have their own template parameters. + */ + template struct morph_saving_offsets_t { + /** + * @brief Morph_with_offsets-operator. Changes the (compressed) format of the given + * column from the source format `t_src_f` to the destination format + * `t_dst_f` without logically changing the data. + * + * This function is deleted by default, to guarantee that using this struct + * with a format combination it is not specialized for causes a compiler + * error, not a linker error. + * + * @param inCol The data represented in the source format + previous block_offsets. + * @return The same data represented in the destination format. + */ + static column_with_blockoffsets *apply(column_with_blockoffsets *inCol) = delete; + }; + + /** + * A convenience function wrapping the morph-saving-offset-operator. + * ! Only works if the block-size = 1 (as otherwise invalid blockoffsets) + * + * Changes the (compressed) format of the given column from the source format + * `t_src_f` to the destination format `t_dst_f` without logically changing the + * data. + * + * @param inCol The data represented in the source format. + * @return The same data represented in the destination format. + */ + template + column_with_blockoffsets *morph_saving_offsets(const column *inCol) { + return morph_saving_offsets(new column_with_blockoffsets(inCol)); + } + + /** + * A convenience function wrapping the morph-operator. + * + * Changes the (compressed) format of the given column from the source format + * `t_src_f` to the destination format `t_dst_f` without logically changing the + * data. + * + * @param inCol The data represented in the source format. + * @return The same data represented in the destination format. + */ + template + column_with_blockoffsets *morph_saving_offsets(column_with_blockoffsets *inCol) { + return morph_saving_offsets_t::apply(inCol); + } + + // ---------------------------------------------------------------------------- + // Partial specialization for morphing from a format to itself + // ---------------------------------------------------------------------------- + + /** + * @brief A template specialization of the morph-operator handling the case + * when the source and the destination format are the same. + * + * It merely returns the given column without doing any work. + */ + template struct morph_saving_offsets_t { + static column_with_blockoffsets *apply(column_with_blockoffsets *inCol) { return inCol; }; + }; + + /** + * @brief A template specialization of the morph-operator handling the case + * when the source and the destination format are both uncompressed. + * + * We need to make this case explicit, since otherwise, the choice of the + * right partial template specialization is ambiguous for the compiler. + */ + template struct morph_saving_offsets_t { + static column_with_blockoffsets *apply(column_with_blockoffsets *inCol) { + return inCol; + }; + }; + + // ---------------------------------------------------------------------------- + // Partial specialization for all compressing morph operators + // ---------------------------------------------------------------------------- + + template + struct morph_saving_offsets_t { + using src_f = uncompr_f; + + // saving the offsets for every value would have an unacceptable overhead + static_assert(t_dst_f::m_BlockSize != 1, + "Blocksize of 1 is only expected for uncompr_f .. block-wise morph is useless in this case"); + + static column_with_blockoffsets *apply(column_with_blockoffsets *inCol_with_offsets) { + + const size_t t_BlockSize = t_dst_f::m_BlockSize; + + auto inCol = inCol_with_offsets->get_column(); + + std::vector *block_offsets = new std::vector(); + + const size_t countLog = inCol->get_count_values(); + const size_t outCountLogCompr = round_down_to_multiple(countLog, t_BlockSize); + const size_t outSizeRestByte = uncompr_f::get_size_max_byte(countLog - outCountLogCompr); + + const uint8_t *in8 = inCol->get_data(); + + auto outCol = new column(get_size_max_byte_any_len(countLog)); + uint8_t *out8 = outCol->get_data(); + const uint8_t *const initOut8 = out8; + + const size_t countBlocks = countLog / t_BlockSize; + block_offsets->reserve(countBlocks); + + // morphing each block and save the offset + for (size_t blockIdx = 0; blockIdx < countBlocks; blockIdx++) { + // saving the start address of the block + block_offsets->push_back(out8); + + // only t_BlockSizeLog as only on block at a time should be morphed + morph_batch(in8, out8, t_BlockSize); + } + + const size_t sizeComprByte = out8 - initOut8; + + // needed for last block (if incomplete data stays uncompressed) + if (outSizeRestByte) { + out8 = column::create_data_uncompr_start(out8); + block_offsets->push_back(out8); + memcpy(out8, in8, outSizeRestByte); + } + + outCol->set_meta_data(countLog, out8 - initOut8 + outSizeRestByte, sizeComprByte); + + return new column_with_blockoffsets(outCol, block_offsets); + } + }; + + // ---------------------------------------------------------------------------- + // Partial specialization for all decompressing morph operators + // ---------------------------------------------------------------------------- + + // as uncompressed has a blocksize of 1 --> no need to save blockoffsets + template + struct morph_saving_offsets_t { + using dst_f = uncompr_f; + + static column_with_blockoffsets *apply(column_with_blockoffsets *inCol_with_offset) { + // TODO: morph_batch each block independently (see above) + auto inCol = inCol_with_offset->get_column(); + auto block_offsets = inCol_with_offset->get_block_offsets(); + + const size_t countLog = inCol->get_count_values(); + + const size_t outSizeByte = dst_f::get_size_max_byte(countLog); + auto outCol = new column(outSizeByte); + uint8_t *out8 = outCol->get_data(); + + // !! need to decompress each block seperatly + // example problem: + // delta: morphing multi blocks at once -> block start value = diff to previous block + // morphing one block at a time -> block start value = first actual value of the block + // example: col 0..2047 + // --> morph(): start-values: 0 ; 1 + // --> morph_saving_offsets(): start-values: 0 ; 1024 + for (uint64_t i = 0; i < block_offsets->size(); i++) { + auto offset = block_offsets->at(i); + + // uncompressed last block + if ((i == block_offsets->size() - 1) && !inCol_with_offset->last_block_compressed()) { + memcpy(out8, offset, uncompr_f::get_size_max_byte(countLog % t_src_f::m_BlockSize)); + } else { + morph_batch(offset, out8, t_src_f::m_BlockSize); + } + } + + outCol->set_meta_data(countLog, outSizeByte); + + return new column_with_blockoffsets(outCol); + } + }; + +} // namespace morphstore + +#endif // MORPHSTORE_CORE_MORPHING_MORPH_SAVING_OFFSETS_H diff --git a/include/core/operators/graph/degree_measurement.h b/include/core/operators/graph/degree_measurement.h new file mode 100644 index 00000000..886a235d --- /dev/null +++ b/include/core/operators/graph/degree_measurement.h @@ -0,0 +1,87 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file degree_measurement.h + * @brief computing a degree distribution of a given graph + * @todo multi-threaded impl? ; add tests + */ + +#ifndef MORPHSTORE_DEGREE_MEASUREMENT +#define MORPHSTORE_DEGREE_MEASUREMENT + +#include + +#include + +namespace morphstore { + + class DegreeMeasurement { + + public: + // function to return a list of pair < vertex id, degree > DESC: + static std::vector> get_list_of_degree_DESC(std::shared_ptr &graph) { + std::vector> vertexDegreeList; + auto vertex_count = graph->getVertexCount(); + vertexDegreeList.reserve(vertex_count); + + // fill the vector with every vertex key and his degree + for (uint64_t i = 0; i < vertex_count; ++i) { +#if DEBUG + if (i % 10000 == 0) { + std::cout << "Degree-List - Current Progress" << i << "/" << vertex_count << std::endl; + } +#endif + vertexDegreeList.push_back({i, graph->get_out_degree(i)}); + } + + // sort the vector on degree DESC + std::sort(vertexDegreeList.begin(), vertexDegreeList.end(), + [](const std::pair &left, const std::pair &right) { + return left.second > right.second; + }); + + return vertexDegreeList; + } + + // function to measure graph characteristics (degree and count) and write the result to a given file: + static void measure_degree_count(std::shared_ptr graph, std::string filePath) { + std::vector> verticesDegree = get_list_of_degree_DESC(graph); + // unordered map for mapping degree to count: + std::unordered_map results; + + for (uint64_t i = 0; i < verticesDegree.size(); ++i) { + // increment count in results for a given degree: + results[verticesDegree[i].second]++; + } + + // write to file: + std::ofstream fs; + std::stringstream ss; + // open file for writing and delete existing stuff: + fs.open(filePath, std::fstream::out | std::ofstream::trunc); + + for (auto const &m : results) { + ss << m.first << "," << m.second << "\n"; + } + fs << ss.str(); + fs.close(); + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_DEGREE_MEASUREMENT diff --git a/include/core/operators/graph/page_rank.h b/include/core/operators/graph/page_rank.h new file mode 100644 index 00000000..8512897b --- /dev/null +++ b/include/core/operators/graph/page_rank.h @@ -0,0 +1,114 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file page_rank.h + * @brief naive page-rank implementation (based on https://en.wikipedia.org/wiki/PageRank) + * @todo multi-threaded impl? ; add tests; weighted implementation + */ + +#ifndef MORPHSTORE_PAGE_RANK +#define MORPHSTORE_PAGE_RANK + +#include + +// for equal with tolerance +#include +// for std::abs +#include + +namespace morphstore { + + struct PageRankResult { + // input parameters + uint64_t max_iterations; + float damping_factor, tolerance; + + uint64_t ran_iterations = 0; + // terminated as scores converged? + bool converged; + // i-th entry for vertex with id i + std::vector scores; + + // leaving out the scores + std::string describe() { + std::string converged_str = converged ? "True" : "False"; + return "Input-Parameters: { damping-factor: " + std::to_string(damping_factor) + + ", max-iterations: " + std::to_string(max_iterations) + + ", tolerance: " + std::to_string(tolerance) + "} \n\t\t\t" + + "Computed: { converged: " + converged_str + ", ran_iterations: " + std::to_string(ran_iterations) + + "}"; + } + }; + + class PageRank { + + public: + // assuming a consecutive vertex id-space + static PageRankResult compute(std::shared_ptr graph, const uint64_t max_iterations = 20, + const float damping_factor = 0.85, const float tolerance = 0.0001) { + // init score vector with 1/vertex_count; + const uint64_t vertex_count = graph->getVertexCount(); + std::vector scores(vertex_count, 1.0 / vertex_count); + + uint64_t iteration; + bool converged = false; + + for (iteration = 0; iteration < max_iterations; iteration++) { + // init scores of current iteration + std::vector new_scores(vertex_count, (1.0 - damping_factor) / vertex_count); + + // loop over all vertices + for (uint64_t i = 0; i < vertex_count; ++i) { + const auto neighbors = graph->get_neighbors_ids(i); + + // damping_factor * (prev-it-PR(i) / degr(i)) + const auto value_to_propagate = damping_factor * (scores[i] / neighbors.size()); + + // propagate score to its neighbours + for (auto neighbor_id : neighbors) { + new_scores[neighbor_id] += value_to_propagate; + } + } + + if (std::equal(scores.begin(), scores.end(), new_scores.begin(), new_scores.end(), + [tolerance](float score, float other_score) { + return std::abs(score - other_score) < tolerance; + })) { + converged = true; + break; + } + + scores = new_scores; + } + + // build result; + PageRankResult result; + result.damping_factor = damping_factor; + result.max_iterations = max_iterations; + result.tolerance = tolerance; + + result.converged = converged; + result.ran_iterations = iteration; + result.scores = scores; + + return result; + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_PAGE_RANK diff --git a/include/core/operators/graph/top_down_bfs.h b/include/core/operators/graph/top_down_bfs.h new file mode 100644 index 00000000..3da7129d --- /dev/null +++ b/include/core/operators/graph/top_down_bfs.h @@ -0,0 +1,128 @@ +/********************************************************************************************** + * Copyright (C) 2019-2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file top_down_bfs.h + * @brief top down BFS implementation to traverse graph + * @todo implement vectorized BFS (AVX2, AVX-512) ; return list of visited nodes + visiting depth maybe + */ + +#ifndef MORPHSTORE_TOP_DOWN_BFS +#define MORPHSTORE_TOP_DOWN_BFS + +#include +#include + +#include + +namespace morphstore { + class BFS { + + public: + // actual BFS algorithm: takes the start-node id and returns the number of explored vertices + static uint64_t compute(std::shared_ptr graph, uint64_t startVertex) { + std::vector frontier; + std::vector next; + std::vector visited(graph->getVertexCount(), false); + uint64_t exploredVertices = 0; + + frontier.push_back(startVertex); + visited[startVertex] = true; + + while (!frontier.empty()) { + // Loop through current layer of vertices in the frontier + for (uint64_t i = 0; i < frontier.size(); ++i) { + uint64_t currentVertex = frontier[i]; + // get list of a vertex's adjacency + std::vector neighbors = graph->get_neighbors_ids(currentVertex); + + // Loop through all of neighbors of current vertex + for (uint64_t j = 0; j < neighbors.size(); ++j) { + // check if neighbor has been visited, if not -> put into frontier and mark as visit = true + if (!visited[neighbors[j]]) { + next.push_back(neighbors[j]); + visited[neighbors[j]] = true; + ++exploredVertices; + } + } + } + // swap frontier with next + frontier.swap(next); + // clear next: swap with an empty container is faster + std::vector().swap(next); + } + return exploredVertices; + } + + // ------------------------------------------ Measurement stuff ------------------------------------------ + + // function that measures the number of explored vertices and time in ms: + // results are written into a file; cycle determines the ith vertex from list + static void do_measurements(std::shared_ptr graph, uint64_t cycle, std::string pathToFile) { + // list of measurement candidates: the parameter means the ith vertex in total + std::vector candidates = get_list_of_every_ith_vertex(graph, cycle); + + // Intermediate data structure: (explored vertices, time in ms) + std::vector> results; + results.reserve(candidates.size()); + + for (uint64_t i = 0; i < candidates.size(); ++i) { + // start measuring bfs time: + auto startBFSTime = std::chrono::high_resolution_clock::now(); + + uint64_t exploredVertices = compute(graph, candidates[i]); + + auto finishBFSTime = std::chrono::high_resolution_clock::now(); // For measuring the execution time + auto elapsedBFSTime = + std::chrono::duration_cast(finishBFSTime - startBFSTime).count(); + + // write to intermediate array: + results.push_back({exploredVertices, elapsedBFSTime}); + } + + // WRITE INTERMEDIATES TO FILE: + std::ofstream fs; + std::stringstream ss; + std::string filename = pathToFile; + // open file for writing and delete existing stuff: + fs.open(filename, std::fstream::out | std::ofstream::trunc); + + ss << "explored vertices | time in ms \n"; + + for (uint64_t j = 0; j < results.size(); j++) { + ss << results[j].first << "," << results[j].second << "\n"; + } + fs << ss.str(); + + fs.close(); + } + + // function which returns a list of every ith vertex which is sorted by degree DESC + // TODO: could be seen as a generell helper function -> move into seperate header + static std::vector get_list_of_every_ith_vertex(std::shared_ptr graph, uint64_t cycle) { + std::vector measurementCandidates; + std::vector> totalListOfVertices = + DegreeMeasurement::get_list_of_degree_DESC(graph); + for (uint64_t i = 0; i < totalListOfVertices.size(); i = i + cycle) { + measurementCandidates.push_back(totalListOfVertices[i].first); + } + return measurementCandidates; + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_TOP_DOWN_BFS diff --git a/include/core/storage/column.h b/include/core/storage/column.h index e590595f..45a7ecc4 100644 --- a/include/core/storage/column.h +++ b/include/core/storage/column.h @@ -42,8 +42,31 @@ enum class storage_persistence_type { queryScope }; +// template-free base class +// use-case: graph formats can change their column format at run-time via `compress(Format f)` +class column_base { + public: + virtual ~column_base() {} + // todo: find a way to specify `inline` + virtual voidptr_t get_data( void ) const = 0; + virtual size_t get_count_values( void ) const = 0; + virtual void set_count_values( size_t p_CountValues ) = 0; + virtual size_t get_size_used_byte( void ) const = 0; + virtual void set_size_used_byte( size_t p_SizeUsedByte ) = 0; + virtual size_t get_size_compr_byte( void ) const = 0; + virtual void set_size_compr_byte( size_t p_SizeComprByte ) = 0; + virtual void set_meta_data( size_t p_CountValues, size_t p_SizeUsedByte, size_t p_SizeComprByte ) = 0; + virtual void set_meta_data( size_t p_CountValues, size_t p_SizeUsedByte) = 0; + + virtual const voidptr_t get_data_uncompr_start() const = 0; + virtual size_t get_count_values_uncompr() const = 0; + virtual size_t get_count_values_compr() const = 0; + // this is a template-method and cannot be defined here? + //virtual bool prepare_for_random_access() const = 0; +}; + template< class F > -class column { +class column : public column_base { static_assert( std::is_base_of< format, F >::value, "column: template parameter F must be a subclass of format" @@ -285,7 +308,5 @@ class column { ); } }; - - } #endif //MORPHSTORE_CORE_STORAGE_COLUMN_H diff --git a/include/core/storage/column_gen.h b/include/core/storage/column_gen.h index f7e51bec..5dd2c154 100644 --- a/include/core/storage/column_gen.h +++ b/include/core/storage/column_gen.h @@ -47,12 +47,13 @@ namespace morphstore { * elements. * * @param vec The vector to initialize the column with. + * @param sudo Overrule limit of 20 * @return An uncompressed column containing a copy of the data in the given * vector. */ -const column * make_column(const std::vector & vec) { +const column * make_column(const std::vector & vec, bool sudo = false) { const size_t count = vec.size(); - if(count > 20) + if(count > 20 && !sudo) throw std::runtime_error( "make_column() is an inefficient convenience function and " "should only be used for very small columns" @@ -64,8 +65,8 @@ const column * make_column(const std::vector & vec) { return resCol; } -const column * make_column(uint64_t const * const vec, size_t count) { - if(count > 400) +const column * make_column(uint64_t const * const vec, size_t count, bool sudo = false) { + if(count > 400 && !sudo) throw std::runtime_error( "make_column() is an inefficient convenience function and " "should only be used for very small columns" diff --git a/include/core/storage/column_with_blockoffsets.h b/include/core/storage/column_with_blockoffsets.h new file mode 100644 index 00000000..35aa20dc --- /dev/null +++ b/include/core/storage/column_with_blockoffsets.h @@ -0,0 +1,84 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file column_with_blockoffsets.h + * @brief Wrapper around column + its block-offsets + */ + +#ifndef MORPHSTORE_CORE_STORAGE_COLUMN_WITH_BLOCKOFFSETS_H +#define MORPHSTORE_CORE_STORAGE_COLUMN_WITH_BLOCKOFFSETS_H + +#include + +namespace morphstore { + +// interface (needed as current Graph formats don't use templates) +class column_with_blockoffsets_base { + public: + virtual ~column_with_blockoffsets_base() {} + + virtual const std::vector *get_block_offsets() = 0; + virtual const uint8_t *get_block_offset(size_t block_number) = 0; + virtual const column_base *get_column() = 0; + virtual size_t get_block_size() = 0; + + size_t get_size_used_byte() { + return get_column()->get_size_used_byte() + (get_block_offsets()->size() * sizeof(uint8_t *)); + } + + bool last_block_compressed() { + return get_column()->get_count_values_uncompr() == 0; + } +}; + +// used to allow only partial decompression of column blocks (for random access) +// blockoffsets should only be saved, if blocksize > 1 +template class column_with_blockoffsets : public column_with_blockoffsets_base { + static_assert(std::is_base_of::value, "column: template parameter F must be a subclass of format"); + + private: + const column *col; + // TODO: use std::optional + const std::vector *block_offsets; + + public: + column_with_blockoffsets(const column *c) + : column_with_blockoffsets(c, new std::vector()) { + static_assert(F::m_BlockSize == 1, "need block offsets if block-size > 1"); + } + + column_with_blockoffsets(const column *c, std::vector *offsets) { + col = c; + block_offsets = offsets; + } + + ~column_with_blockoffsets() { + // ? deleting the column might be not always wanted + delete col; + delete block_offsets; + } + + const std::vector *get_block_offsets() { return block_offsets; } + const uint8_t *get_block_offset(size_t block_number) { return block_offsets->at(block_number); } + + const column *get_column() { return col; } + + inline size_t get_block_size() { return F::m_BlockSize; } +}; +} // namespace morphstore +#endif //MORPHSTORE_CORE_STORAGE_COLUMN_WITH_BLOCKOFFSETS_H diff --git a/include/core/storage/graph/edge/edge.h b/include/core/storage/graph/edge/edge.h new file mode 100644 index 00000000..b6d4e60a --- /dev/null +++ b/include/core/storage/graph/edge/edge.h @@ -0,0 +1,178 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file edge.h + * @brief Edge class which represents an edge object between two vertices + * @todo + */ + +#ifndef MORPHSTORE_EDGE_H +#define MORPHSTORE_EDGE_H + +#include + +#include +#include +#include +#include +#include +#include + +namespace morphstore { + + // for loading a graph + class Edge { + + protected: + // Edge characteristics + uint64_t sourceId, targetId; + unsigned short int type; + + public: + Edge() {} + + virtual ~Edge() = default; + + Edge(uint64_t sourceId, uint64_t targetId, unsigned short int type) { + this->sourceId = sourceId; + this->targetId = targetId; + this->type = type; + } + + // --------------- Getter and Setter --------------- + + uint64_t getSourceId() const { return sourceId; } + + uint64_t getTargetId() const { return targetId; } + + unsigned short getType() const { return type; } + + // function for sorting algorithms in the ldbc-importer: + // compare target-ids and return if it's "lower" (we need the sorting for the CSR) + bool operator<(const Edge &e) const { return getTargetId() < e.getTargetId(); } + + // get size of edge object in bytes: + static size_t size_in_bytes() { + size_t size = 0; + size += sizeof(uint64_t) * 2; // source- and target-id + size += sizeof(unsigned short int); // type + return size; + } + + virtual std::string to_string() const { + return "(" + std::to_string(this->sourceId) + "->" + std::to_string(this->targetId) + ")"; + } + }; + + // for internal usage (inside the edges-container) + class EdgeWithId : public Edge { + private: + uint64_t id; + + // delete flag + // TODO: put as a std::bitset in vectorarray_container (as hashmap-container does not need the valid flag) + bool valid = false; + + public: + // default constr. needed for EdgeWithProperties constructor + EdgeWithId() {} + + EdgeWithId(uint64_t id, uint64_t sourceId, uint64_t targetId, unsigned short int type) + : Edge(sourceId, targetId, type) { + this->id = id; + this->valid = true; + } + + EdgeWithId(uint64_t id, Edge edge) : Edge(edge.getSourceId(), edge.getTargetId(), edge.getType()) { + this->id = id; + this->valid = true; + } + + uint64_t getId() const { return id; } + + bool isValid() const { return valid; } + + // this is needed for edges_container when doing edges[id] = edge + EdgeWithId &operator=(const EdgeWithId &edge) { + // self-assignment guard + if (this == &edge) + return *this; + + // do the copy + this->sourceId = edge.getSourceId(); + this->targetId = edge.getTargetId(); + this->type = edge.getType(); + this->id = edge.getId(); + this->valid = edge.isValid(); + + // return the existing object so we can chain this operator + return *this; + } + + // edge size + id and valid flag + static size_t size_in_bytes() { return Edge::size_in_bytes() + sizeof(uint64_t) + sizeof(bool); } + + std::string to_string() const override { + return "(id:" + std::to_string(this->id) + " ," + "valid: " + std::to_string(this->valid) + + Edge::to_string() + ")"; + } + }; + + // for loading + class EdgeWithProperties { + private: + std::unordered_map properties; + // not using inheritance as vector elements could not get cast to EdgeWithProperties + Edge edge; + + public: + EdgeWithProperties(uint64_t sourceId, uint64_t targetId, unsigned short int type, + const std::unordered_map properties) { + this->edge = Edge(sourceId, targetId, type); + this->properties = properties; + } + + EdgeWithProperties(uint64_t sourceId, uint64_t targetId, unsigned short int type) { + this->edge = Edge(sourceId, targetId, type); + } + + Edge getEdge() const { return edge; } + + std::unordered_map getProperties() { return properties; } + + bool operator<(const EdgeWithProperties &e) const { return edge.getTargetId() < e.getEdge().getTargetId(); } + }; + + // for returning an edge to the user + class EdgeWithIdAndProperties { + private: + std::unordered_map properties; + EdgeWithId edge; + + public: + EdgeWithIdAndProperties(EdgeWithId edge, const std::unordered_map properties) { + this->edge = edge; + this->properties = properties; + } + EdgeWithId getEdge() { return edge; } + + std::unordered_map getProperties() { return properties; } + }; +} // namespace morphstore + +#endif // MORPHSTORE_EDGE_H diff --git a/include/core/storage/graph/edge/edges_container.h b/include/core/storage/graph/edge/edges_container.h new file mode 100644 index 00000000..62f4897b --- /dev/null +++ b/include/core/storage/graph/edge/edges_container.h @@ -0,0 +1,167 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file edges_container.h + * @brief abstract class for storing edges + * @todo an EntityContainer abstraction (reduce duplicated code to vertices_container.h) + */ + +#ifndef MORPHSTORE_EDGES_CONTAINER_H +#define MORPHSTORE_EDGES_CONTAINER_H + +#include +#include + +#include +#include +#include +#include + +namespace morphstore { + enum class EdgesContainerType { HashMapContainer, VectorArrayContainer }; + + class EdgesContainer { + protected: + uint64_t expected_edge_count = 0; + // ! this should be an atomic one, if multi-threaded insertion is of interest + uint64_t current_max_edge_id = 0; + + std::map edge_type_dictionary; + + // TODO: try other property storage formats than per edge .. (triple-store or per property) + std::unordered_map> edge_properties; + + std::string get_edge_type(unsigned short int type) const { + if (edge_type_dictionary.find(type) != edge_type_dictionary.end()) { + return edge_type_dictionary.at(type); + } else { + // could also throw an error here + return "No Matching of type-number in the database! For type " + std::to_string(type); + } + } + + // for assigning ids + uint64_t get_next_edge_id() { return current_max_edge_id++; } + + public: + virtual std::string container_description() const = 0; + virtual void insert_edge(EdgeWithId e) = 0; + virtual EdgeWithId get_edge(uint64_t id) = 0; + virtual bool exists_edge(const uint64_t id) const = 0; + virtual uint64_t edge_count() const = 0; + + virtual void allocate(uint64_t expected_edges) { + edge_properties.reserve(expected_edges); + expected_edge_count += expected_edges; + } + + uint64_t add_edge(Edge edge) { + auto id = get_next_edge_id(); + insert_edge(EdgeWithId(id, edge)); + return id; + } + + uint64_t add_edge(EdgeWithProperties edge) { + auto id = add_edge(edge.getEdge()); + + if (auto properties = edge.getProperties(); !properties.empty()) { + edge_properties[id] = properties; + } + + return id; + } + + bool has_properties(uint64_t id) { return edge_properties.find(id) != edge_properties.end(); } + + void add_property_to_edge(uint64_t id, const std::pair property) { + assert(exists_edge(id)); + edge_properties[id].insert(property); + }; + + void set_edge_properties(uint64_t id, const std::unordered_map properties) { + assert(exists_edge(id)); + + if (has_properties(id)) { + std::cout << "Overwritting existing properties for :"; + print_edge_by_id(id); + std::cout << std::endl; + } + + edge_properties[id] = properties; + }; + + void set_edge_type_dictionary(const std::map &types) { + assert(types.size() != 0); + this->edge_type_dictionary = types; + } + + const EdgeWithIdAndProperties get_edge_with_properties(uint64_t id) { + assert(exists_edge(id)); + return EdgeWithIdAndProperties(get_edge(id), edge_properties[id]); + } + + uint64_t edges_with_properties_count() { return edge_properties.size(); } + + virtual std::pair get_size() const { + size_t data_size = 0; + size_t index_size = 0; + + // lookup type dicts + index_size += 2 * sizeof(std::map); + for (auto &type_mapping : edge_type_dictionary) { + index_size += sizeof(unsigned short int); + index_size += sizeof(char) * (type_mapping.second.length()); + } + + // edge-properties: + index_size += sizeof(std::unordered_map>); + for (const auto &property_mapping : edge_properties) { + index_size += sizeof(uint64_t) + sizeof(std::unordered_map); + for (const auto &property : property_mapping.second) { + data_size += sizeof(char) * property.first.length() + sizeof(property.second); + } + } + + return {index_size, data_size}; + } + + void print_type_dict() { + std::cout << "EdgeType-Dict: " << std::endl; + for (auto const &entry : edge_type_dictionary) { + std::cout << entry.first << " -> " << entry.second << std::endl; + } + } + + void print_edge_by_id(const uint64_t id) { + std::cout << "-------------- Edge ID: " << id << " --------------" << std::endl; + auto e = get_edge_with_properties(id); + std::cout << e.getEdge().to_string() << std::endl; + std::cout << "Type: " << this->get_edge_type(e.getEdge().getType()) << std::endl; + std::cout << "Properties: "; + for (const auto entry : e.getProperties()) { + auto value = entry.second; + std::cout << "{" << entry.first << ": "; + std::visit(PropertyValueVisitor{}, value); + std::cout << "}"; + } + std::cout << std::endl; + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_EDGES_CONTAINER_H \ No newline at end of file diff --git a/include/core/storage/graph/edge/edges_hashmap_container.h b/include/core/storage/graph/edge/edges_hashmap_container.h new file mode 100644 index 00000000..dfbaace7 --- /dev/null +++ b/include/core/storage/graph/edge/edges_hashmap_container.h @@ -0,0 +1,79 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file edges_hashmap_container.h + * @brief storing edges using a hashmap + * @todo an EntityHashMapContainer abstraction (reduce duplicated code to VertexHashMapContainer) + */ + +#ifndef MORPHSTORE_EDGES_HASHMAP_CONTAINER_H +#define MORPHSTORE_EDGES_HASHMAP_CONTAINER_H + +#include "edge.h" +#include "edges_container.h" + +#include +#include + +namespace morphstore { + + class EdgesHashMapContainer : public EdgesContainer { + protected: + // mapping edge id -> edge + // currently saving the id twice + std::unordered_map edges; + + public: + std::string container_description() const override { return "unordered_map"; } + + void allocate(const uint64_t expected_edges) override { + EdgesContainer::allocate(expected_edges); + this->edges.reserve(expected_edges); + } + + // TODO: unpack EdgeWithId to just Edge (avoid saving edge-id twice) + void insert_edge(const EdgeWithId e) override { edges[e.getId()] = e; } + + bool exists_edge(const uint64_t id) const override { + if (edges.find(id) == edges.end()) { + return false; + } + return true; + } + + EdgeWithId get_edge(uint64_t id) override { return edges[id]; } + + uint64_t edge_count() const { return edges.size(); } + + // memory estimation + // returns a pair of index-size, data-size + std::pair get_size() const override { + auto [index_size, data_size] = EdgesContainer::get_size(); + + // container for indexes: + index_size += sizeof(std::unordered_map); + // index size of edge: size of id and sizeof pointer + index_size += edges.size() * sizeof(uint64_t); + data_size += edges.size() * EdgeWithId::size_in_bytes(); + + return {index_size, data_size}; + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_EDGES_HASHMAP_CONTAINER_H \ No newline at end of file diff --git a/include/core/storage/graph/edge/edges_vectorarray_container.h b/include/core/storage/graph/edge/edges_vectorarray_container.h new file mode 100644 index 00000000..00ee32f1 --- /dev/null +++ b/include/core/storage/graph/edge/edges_vectorarray_container.h @@ -0,0 +1,136 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file edges_vectorarray_container.h + * @brief storing edges using a vector of arrays; assuming a consecutive id space + * @todo + */ + +#ifndef MORPHSTORE_EDGES_VECTORARRAY_CONTAINER_H +#define MORPHSTORE_EDGES_VECTORARRAY_CONTAINER_H + +#include "edge.h" +#include "edges_container.h" + +#include +#include +#include +#include + +namespace morphstore { + // very different to VerticesVectorArrayContainer as edge ids are not given at insertion time! + // (not anymore, but not considered in current implementation) + // and using std::array as aligned_alloc did not set invalid flag to false (could be solveable) + class EdgesVectorArrayContainer : public EdgesContainer { + protected: + static const inline uint64_t edge_array_size = 4096; + static const inline uint64_t edges_per_array = edge_array_size / sizeof(EdgeWithId); + + using edge_array = std::array; + std::vector edges; + + uint64_t number_of_edges = 0; + + edge_array allocate_edge_array() { + edge_array array; + edges.push_back(array); + // std::cout << " Added a page" << std::endl; + // std::cout.flush(); + + return array; + } + + inline uint64_t get_edge_array_number(uint64_t edge_id) const { return edge_id / edges_per_array; } + + inline uint64_t get_pos_in_array(uint64_t edge_id) const { return edge_id % edges_per_array; } + + public: + std::string container_description() const override { + return "vector>"; + } + + void allocate(const uint64_t expected_edges) override { + EdgesContainer::allocate(expected_edges); + // rounding up .. only whole arrays can be allocated + auto array_count = std::ceil(expected_edges / (float)edges_per_array); + this->edges.reserve(array_count); + + for (int i = 0; i < array_count; i++) { + allocate_edge_array(); + } + } + + void insert_edge(EdgeWithId e) { + // not assuming sequentiell insertion (could be changed to just insert at a given position) + // and only assert that the given position matches + auto array_number = get_edge_array_number(e.getId()); + auto array_pos = get_pos_in_array(e.getId()); + + // second time to assert that expected edge count is not exceeded ? + if (array_number >= edges.size()) { + throw std::runtime_error("Exceeded edge id limit: Edge id " + std::to_string(e.getId()) + " > " + + std::to_string(edges_per_array * edges.size() - 1)); + } + + /* if (edges.at(array_number)[array_pos].isValid()) { + throw std::runtime_error("Delete existing edge before overwriting it: edge-id " + e.to_string()); + } */ + + edges.at(array_number)[array_pos] = e; + number_of_edges++; + } + + bool exists_edge(const uint64_t id) const override { + uint64_t array_number = get_edge_array_number(id); + uint64_t pos_in_array = get_pos_in_array(id); + + if (array_number >= edges.size()) + return false; + + return edges.at(array_number)[pos_in_array].isValid(); + } + + EdgeWithId get_edge(uint64_t id) override { + uint64_t array_number = get_edge_array_number(id); + uint64_t pos_in_array = get_pos_in_array(id); + + assert(array_number < edges.size()); + + return edges.at(array_number)[pos_in_array]; + } + + uint64_t edge_count() const override { return number_of_edges; } + + // memory estimation + // returns a pair of index-size, data-size + std::pair get_size() const override { + auto [index_size, data_size] = EdgesContainer::get_size(); + + // vector count, current_array_offset + index_size += 2 * sizeof(uint64_t); + + index_size += sizeof(std::vector); + // allocated memory for edges + data_size += edges.size() * sizeof(edge_array); + + return {index_size, data_size}; + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_EDGES_VECTORARRAY_CONTAINER_H \ No newline at end of file diff --git a/include/core/storage/graph/formats/adjacencylist.h b/include/core/storage/graph/formats/adjacencylist.h new file mode 100644 index 00000000..a2ddbbf2 --- /dev/null +++ b/include/core/storage/graph/formats/adjacencylist.h @@ -0,0 +1,328 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file adjacencylist.h + * @brief Derived adj. list storage format class. Base: graph.h + * @todo try replacing unordered_map with a fixed sized array; read more into std::variant not allowing references (seems to work though) +*/ + +#ifndef MORPHSTORE_ADJACENCYLIST_H +#define MORPHSTORE_ADJACENCYLIST_H + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace morphstore { + + class AdjacencyList : public Graph { + + private: + // const column as after finalized only read_only + using adjacency_column = column_base *; + using adjacency_vector = std::vector *; + // an adjacency-list can be either a column or a vector + using adjacency_list_variant = std::variant; + + // visitor for accessing std::variant + struct Adjacency_List_Size_Visitor { + size_t operator()(const adjacency_column c) const { return c->get_size_used_byte(); } + size_t operator()(const adjacency_vector v) const { + return sizeof(std::vector) + (v->size() * sizeof(uint64_t)); + } + }; + + // visitor for accessing std::variant + struct Adjacency_List_OutDegree_Visitor { + uint64_t operator()(const adjacency_column c) const { + // assuming compressed col has the same value count (would not work for RLE?) + return c->get_count_values(); + } + uint64_t operator()(const adjacency_vector v) const { return v->size(); } + }; + + GraphCompressionFormat current_compression = GraphCompressionFormat::UNCOMPRESSED; + + // maps the a list of outgoing edges (ids) to a vertex-id (representing the graph topology) + // TODO: try using a vector instead an unordered_map (? faster access, but needs more memory for empty adj.-lists ?) + std::unordered_map *adjacencylistPerVertex = + new std::unordered_map(); + + // as formats allocate to much memory for small columns + // adj-lists with a lower degree, are stored as vectors (others are columns) + uint64_t min_compr_degree = 1024; + + // convert big-enough adj-vector to a (read-only) adj-column (based on the min_compr_degree) + void finalize() { + int vectors_transformed = 0; + for (auto [id, adj_list] : *adjacencylistPerVertex) { + if (std::holds_alternative(adj_list)) { + auto adj_vector = std::get(adj_list); + // this allows adding new edges to smaller adj_lists (even after morphing) + if (adj_vector->size() >= min_compr_degree) { + adjacency_column adj_col = + const_cast(make_column(adj_vector->data(), adj_vector->size(), true)); + + if (current_compression != GraphCompressionFormat::UNCOMPRESSED) { + adj_col = const_cast(morph_graph_col( + adj_col, GraphCompressionFormat::UNCOMPRESSED, current_compression, true)); + } + + (*adjacencylistPerVertex)[id] = adj_col; + + // as vector is not needed anymore and allocated using new + delete adj_vector; + vectors_transformed++; + } + } + // TODO: higher-min compr degree -> transform columns back to vector using: + // new std::vector() + // adjacency_vector adj_vec(src, src + n); + // (*adjacencylistPerVertex)[id] = adj_vec; + // delete old column + } +#if DEBUG + std::cout << "Transformed " << vectors_transformed << " vectors into columns" << std::endl; +#endif + } + + protected: + // function that adds multiple edges (list of neighbors) at once to vertex + void add_to_vertex_edges_mapping(uint64_t sourceId, const std::vector edge_ids) override { + // avoid inserting an empty adjacencyVector (waste of memory) + if (edge_ids.size() == 0) { + return; + } + + std::vector *adjacencyVector; + if (auto entry = adjacencylistPerVertex->find(sourceId); entry != adjacencylistPerVertex->end()) { + if (std::holds_alternative(entry->second)) { + throw std::runtime_error("Not implemented to add edges, if adj. list is a (compressed) column"); + } + adjacencyVector = std::get(entry->second); + } else { + adjacencyVector = new std::vector(); + adjacencylistPerVertex->insert({sourceId, adjacencyVector}); + } + + adjacencyVector->reserve(edge_ids.size()); + adjacencyVector->insert(adjacencyVector->end(), edge_ids.begin(), edge_ids.end()); + } + + public: + ~AdjacencyList() { + // as vectors and columns are allocated using new -> need to delete them manually + for (auto [id, adj_list] : *this->adjacencylistPerVertex) { + if (std::holds_alternative(adj_list)) { + delete std::get(adj_list); + } else { + delete std::get(adj_list); + } + } + + delete this->adjacencylistPerVertex; + } + + AdjacencyList(EdgesContainerType edges_container_type) + : Graph(VerticesContainerType::VectorArrayContainer, edges_container_type) {} + + AdjacencyList(VerticesContainerType vertices_container_type = VerticesContainerType::VectorArrayContainer) + : Graph(vertices_container_type) {} + + std::string get_storage_format() const override { return "Adjacency_List"; } + + std::string get_compression_format() const override { return graph_compr_f_to_string(current_compression); } + + // function: to set graph allocations + void allocate_graph_structure(uint64_t numberVertices, uint64_t numberEdges) override { + Graph::allocate_graph_structure(numberVertices, numberEdges); + adjacencylistPerVertex->reserve(numberVertices); + } + + // currently new_min_compr_degree must be smaller or equal than the current min_compr_degree + void set_min_compr_degree(uint64_t new_min_compr_degree) { + if (new_min_compr_degree > min_compr_degree) { + // allowing this would need re-transforming finalized columns to vectors + // when this is allowed, the min_compr_degree should be enough as an function parameter for finalize + throw std::runtime_error("Only supporting an decreasing minimum compression degree (new: " + + std::to_string(new_min_compr_degree) + + ", current: " + std::to_string(min_compr_degree) + ")"); + } + this->min_compr_degree = new_min_compr_degree; + // applying the new min_compr_degree + finalize(); + } + + // adding a single edge to vertex: + // graph-format specific, as CSR is currently limited to bulk inserts + uint64_t add_edge(uint64_t sourceId, uint64_t targetId, unsigned short int type) override { + Edge e = Edge(sourceId, targetId, type); + return add_edges(sourceId, {e})[0]; + } + + uint64_t get_min_compr_degree() { return min_compr_degree; } + + // get number of outgoing edges for the vertex with the given id + uint64_t get_out_degree(uint64_t id) override { + auto entry = adjacencylistPerVertex->find(id); + if (entry == adjacencylistPerVertex->end()) { + return 0; + } else { + return std::visit(Adjacency_List_OutDegree_Visitor{}, entry->second); + } + } + + std::vector get_outgoing_edge_ids(uint64_t id) override { + // basically column -> vector (as convinient to use in other methods) + // maybe better idea would be to return a uint64_t* instead (together with a size value) + std::vector edge_ids; + if (auto entry = adjacencylistPerVertex->find(id); entry != adjacencylistPerVertex->end()) { + auto adj_list = entry->second; + if (std::holds_alternative(adj_list)) { + auto uncompr_col = decompress_graph_col(std::get(adj_list), current_compression); + const size_t column_size = uncompr_col->get_count_values(); + // TODO: init vector via range-constructor / mem-cpy + // const uint8_t * end_addr = start_addr + sizeof(uint64_t) * out_degree; + const uint64_t *start_addr = uncompr_col->get_data(); + + edge_ids.insert(edge_ids.end(), start_addr, start_addr + column_size); + + if (current_compression != GraphCompressionFormat::UNCOMPRESSED) { + delete uncompr_col; + } + } else { + edge_ids = *std::get(adj_list); + } + } + return edge_ids; + } + + // morphes the adj-lists to the given target_format + void morph(GraphCompressionFormat target_format) override { morph(target_format, true); } + + // ! vector<->column conversion overhead if min_degree is different + // ! morphing to UNCOMPRESSED results in all adj-lists being columns (instead of vectors) + void morph(GraphCompressionFormat target_format, bool blocksize_based_min_degree) { + if (blocksize_based_min_degree) { + // as if blocksize > size of adjlist -> stays uncompressed but still allocates a whole block + set_min_compr_degree(graph_compr_f_block_size(target_format)); + } else { + // transform big enough vectors into columns (based on the min_compr_degree) + this->finalize(); + } + +#if DEBUG + std::cout << "Compressing graph format specific data structures using: " + << graph_compr_f_to_string(target_format) << std::endl; + auto entry_count = adjacencylistPerVertex->size(); + int progress = 0; +#endif + for (auto const [id, adj_list] : *adjacencylistPerVertex) { +#if DEBUG + if (progress % 10000 == 0) { + std::cout << "Compression Progress: " << progress << "/" << entry_count << std::endl; + } + progress++; +#endif + // adj. lists >= min_compr_degree are columns + if (std::holds_alternative(adj_list)) { + auto old_adj_col = std::get(adj_list); + // const_cast needed as map-value is not constant + (*adjacencylistPerVertex)[id] = const_cast( + morph_graph_col(old_adj_col, current_compression, target_format, true)); + } + } + + this->current_compression = target_format; + } + + double compr_ratio() const { + double total_compr_ratio = 0; + for (auto const [id, adj_list] : *adjacencylistPerVertex) { + auto out_degree = std::visit(Adjacency_List_OutDegree_Visitor{}, adj_list); + double compr_ratio; + if (std::holds_alternative(adj_list)) { + auto adj_col = std::get(adj_list); + compr_ratio = compression_ratio(adj_col, current_compression); + } else { + compr_ratio = 1; + } + auto weighted_ratio = compr_ratio * ((double)out_degree / getEdgeCount()); + total_compr_ratio += weighted_ratio; + } + + return total_compr_ratio; + } + + // ratio of adjacency columns (rest would be vectors) + // depends on the min_compr_degree + double column_ratio() const { + // neither coloumns or vectors + if (getEdgeCount() == 0) { + return -1; + } + + uint64_t column_count = 0; + for (auto const [id, adj_list] : *adjacencylistPerVertex) { + if (std::holds_alternative(adj_list)) { + column_count++; + } + } + + return (double)column_count / adjacencylistPerVertex->size(); + } + + // for measuring the size in bytes: + std::pair get_size_of_graph() const override { + // graph-format agnostic memory usage (like storage for entities) + auto [index_size, data_size] = Graph::get_size_of_graph(); + + // min_compr_degree + index_size += sizeof(uint64_t); + + // adjacencyListPerVertex + index_size += sizeof(std::unordered_map); + // overhead for each map-entry + index_size += adjacencylistPerVertex->size() * (sizeof(uint64_t) + sizeof(adjacency_list_variant)); + + for (const auto [id, adj_list] : *adjacencylistPerVertex) { + data_size += std::visit(Adjacency_List_Size_Visitor{}, adj_list); + } + + return {index_size, data_size}; + } + + void statistics() override { + Graph::statistics(); + std::cout << "Number of adjacency lists:" << adjacencylistPerVertex->size() << std::endl; + std::cout << "Min. degree for compression: " << min_compr_degree << std::endl; + std::cout << "Column/Vector ratio: " << column_ratio() << std::endl; + std::cout << "Compression ratio: " << compr_ratio() << std::endl; + std::cout << "--------------------------------------------" << std::endl; + std::cout << std::endl << std::endl; + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_ADJACENCYLIST_H diff --git a/include/core/storage/graph/formats/csr.h b/include/core/storage/graph/formats/csr.h new file mode 100644 index 00000000..0feb05c3 --- /dev/null +++ b/include/core/storage/graph/formats/csr.h @@ -0,0 +1,411 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file csr.h + * @brief Derived CSR storage format class. Base: graph.h + * @todo Edge_value_array should only store edge-ids (not whole objects) + */ + +#ifndef MORPHSTORE_CSR_H +#define MORPHSTORE_CSR_H + +#include +#include + +#include + +#include +#include + +namespace morphstore { + + // simple cache of size 1 (to avoid decompressing the same block multiple times .. f.i. for getting the degree of a + // vertex) + // ! poor mans approach and should be changed, if multi-threaded executions are done + // (cache per thread maybe .. inside a `for_all` iterator maybe) + class ColumnBlockCache { + private: + uint64_t block_number; + const column *decompressed_block; + + public: + ColumnBlockCache(uint64_t block_number, const column *decompressed_block) { + this->block_number = block_number; + this->decompressed_block = decompressed_block; + } + + ~ColumnBlockCache() { + // always valid, as columns in uncompressed format should not use the cache + delete decompressed_block; + } + + uint64_t get_block_number() const { return block_number; } + + const column * get() const { + return decompressed_block; + } + }; + + class CSR : public Graph { + + private: + /* graph topology: + * offset column: index is vertex-id; column entry contains offset in edgeId array + * edgeId column: contains edge ids + */ + column_with_blockoffsets_base *offset_column; + column_with_blockoffsets_base *edgeId_column; + + // current compression formats (to be removed when class accepts template parameters for the formats) + GraphCompressionFormat offsets_compression = GraphCompressionFormat::UNCOMPRESSED; + GraphCompressionFormat edgeIds_compression = GraphCompressionFormat::UNCOMPRESSED; + + // for faster sequentiell access (not respected in memory usage yet) .. ideally encapsulated in an iterator + // as already for getting edge-ids the same block is decompressed 3x otherwise + std::unique_ptr offset_block_cache = nullptr; + // assuming most degrees are << block-size + std::unique_ptr edgeIds_block_cache = nullptr; + + bool is_uncompressed() { + return offsets_compression == GraphCompressionFormat::UNCOMPRESSED && + edgeIds_compression == GraphCompressionFormat::UNCOMPRESSED; + } + + protected: + // this function fills the graph-topology-arrays sequentially in the order of vertex-ids ASC + void add_to_vertex_edges_mapping(uint64_t sourceID, const std::vector edge_ids) override { + // TODO: throw error if not in order of vertex-ids ASC inserted (currently will only produce rubbish data) + // TODO: handle if sourceIDs are skipped + // TODO: !!! handle if last vertex has no edges (wrong offset currently) + // potential solution: add last_seen_vertex_id as class field .. check based on that .. assert order and + // insert offsets for skipped vertices + + // avoid writting more than reserved (as fixed sized columns) + assert(expectedEdgeCount >= getEdgeCount()); + + // currently only read-only if compressed + // for write it would be necessary to decompress the last block; write and compress again + if (!is_uncompressed()) { + throw std::runtime_error("Edge insertion only allowed in uncompressed format. Current format: " + + get_compression_format()); + } + + uint64_t *offset_data = offset_column->get_column()->get_data(); + uint64_t offset = offset_data[sourceID]; + uint64_t nextOffset = offset + edge_ids.size(); + + uint64_t *edgeId_data = edgeId_column->get_column()->get_data(); + // TODO: get copy to work (should be faster than loop) + // std::copy(edge_ids.begin(), edge_ids.end(), edgeId_data); + for (auto edge_id : edge_ids) { + edgeId_data[offset] = edge_id; + offset++; + } + + // to avoid buffer overflow: + if (sourceID < getExpectedVertexCount() - 1) { + offset_data[sourceID + 1] = nextOffset; + } + } + + uint64_t get_offset(uint64_t id) { + auto block_size = offset_column->get_block_size(); + + if (offsets_compression == GraphCompressionFormat::UNCOMPRESSED) { + uint64_t *col_data = offset_column->get_column()->get_data(); + return col_data[id]; + } else { + auto block_number = id / block_size; + auto block_pos = id % block_size; + + assert(block_number < offset_column->get_block_offsets()->size()); + + // TODO refactor this cache logic into a method + const column_uncompr* uncompr_block; + if (offset_block_cache && offset_block_cache->get_block_number() == block_number) { + //std::cout << "cache hit" << std::endl; + uncompr_block = offset_block_cache->get(); + } + else { + //std::cout << "cache miss" << std::endl; + uncompr_block = decompress_column_block(offset_column, offsets_compression, block_number); + + // update cache + offset_block_cache = std::make_unique(block_number, uncompr_block); + } + + uint64_t *block_data = uncompr_block->get_data(); + auto offset = block_data[block_pos]; + return offset; + } + } + + // DEBUG function to look into a column: + void print_column(const column_base *col, int start, int end) const { + // validate interval (fix otherwise) + int col_size = col->get_count_values(); + if (start < 0 || col_size < start) { + start = 0; + } + if (col_size <= end) { + end = col->get_count_values() - 1; + } + + std::cout << "Printing column from " << start << " to " << end << std::endl; + const uint64_t *data = col->get_data(); + + for (auto pos = start; pos <= end; pos++) { + std::cout << "Index: " << pos << " Value:" << data[pos] << std::endl; + } + } + + public: + ~CSR() { + delete offset_column; + delete edgeId_column; + } + + CSR(EdgesContainerType edges_container_type) + : Graph(VerticesContainerType::VectorArrayContainer, edges_container_type) {} + + CSR(VerticesContainerType vertices_container_type = VerticesContainerType::VectorArrayContainer) + : Graph(vertices_container_type) {} + + std::string get_storage_format() const override { return "CSR"; } + + std::string get_compression_format() const override { + return "offsets: " + graph_compr_f_to_string(offsets_compression) + + ", edgeIds: " + graph_compr_f_to_string(edgeIds_compression); + } + + // this function gets the number of vertices/edges and allocates memory for the graph-topology columns + // TODO: test that no data exists before (as this gets overwritten) + void allocate_graph_structure(uint64_t numberVertices, uint64_t numberEdges) override { + Graph::allocate_graph_structure(numberVertices, numberEdges); + + const size_t offset_size = numberVertices * sizeof(uint64_t); + auto offset_col = new column(offset_size); + offset_col->set_meta_data(numberVertices, offset_size); + // wrapping offset_column + offset_column = new column_with_blockoffsets(offset_col); + + const size_t edge_ids_size = numberEdges * sizeof(uint64_t); + auto edgeId_col = new column(edge_ids_size); + edgeId_col->set_meta_data(numberEdges, edge_ids_size); + // wrapping edgeId_column + edgeId_column = new column_with_blockoffsets(edgeId_col); + + // init node array: + uint64_t *offset_data = offset_col->get_data(); + offset_data[0] = 0; + } + + // TODO: add a single edge in graph arrays -> needs a memory reallocating strategy + uint64_t add_edge(uint64_t sourceId, uint64_t targetId, unsigned short int type) override { + throw std::runtime_error("Singe edge addition not yet implemented for CSR" + sourceId + targetId + type); + } + + // get number of edges of vertex with id + uint64_t get_out_degree(uint64_t id) override { + uint64_t offset = get_offset(id); + uint64_t nextOffset; + + // special case: last vertex id has no next offset + if (id == getVertexCount() - 1) { + nextOffset = getEdgeCount(); + } else { + nextOffset = get_offset(id + 1); + } + + // if this fails, than alloc_graph has probably the wrong values + assert(offset <= nextOffset); + + // compute out_degree + return nextOffset - offset; + } + + std::vector get_outgoing_edge_ids(uint64_t id) override { + assert(vertices->exists_vertex(id)); + + std::vector result; + + uint64_t start = get_offset(id); + uint64_t degree = get_out_degree(id); + + // TODO: use cache + result.reserve(degree); + + // end is not included in the result + auto end = start + degree; + + assert(start <= end); + assert(getEdgeCount() >= end); + + if (degree == 0) { + return result; + } + + auto block_size = edgeId_column->get_block_size(); + + if (edgeIds_compression == GraphCompressionFormat::UNCOMPRESSED) { + uint64_t *col_data = edgeId_column->get_column()->get_data(); + result.insert(result.end(), col_data + start, col_data + end); + } else { + // getting one block at a time as most of the time only one block is needed + // also allows to use block cache (would need to inject cache into decompress_block otherwise) + auto start_block = start / block_size; + auto start_block_pos = start % block_size; + auto end_block = end / block_size; + auto end_block_pos = end % block_size; + + assert(start_block < edgeId_column->get_block_offsets()->size()); + assert(end_block < edgeId_column->get_block_offsets()->size()); + + // case that end is the first value of another block (should not decompress that block than) + if (end_block_pos == 0) { + end_block--; + // setting it one step further than actually possible to read from (vector.insert excludes the end) + end_block_pos = block_size; + } + + // most of the case only one block accessed -> might be worth to seperate from for loop (start_block == end_block) + for (auto block_number = start_block; block_number <= end_block; block_number++) { + const column_uncompr *uncompr_block; + // to avoid wrongly deleting a cached block (which could be used by the next access) + // by creating new unique ptr or direct delete (alternatively use a shared_pointer .. might be a + // very good idea) + bool cache_hit = false; + // only looking at cache for first block (as we assume sequential read) + if (block_number == start_block && edgeIds_block_cache && + edgeIds_block_cache->get_block_number() == block_number) { + // std::cout << "edgeId_col cache hit" << std::endl; + uncompr_block = edgeIds_block_cache->get(); + cache_hit = true; + } else { + // std::cout << "edgeId_col cache miss" << std::endl; + uncompr_block = decompress_column_block(edgeId_column, edgeIds_compression, block_number); + } + + uint64_t *block_data = uncompr_block->get_data(); + + // all edge ids in the same block (implicitly end_block == block_number == start_block) + if (start_block == end_block) { + result.insert(result.end(), block_data + start_block_pos, block_data + end_block_pos); + // update cache + if (!cache_hit) { + edgeIds_block_cache = std::make_unique(block_number, uncompr_block); + } + } else if (block_number == end_block) { + // only insert until end_pos + result.insert(result.end(), block_data, block_data + end_block_pos); + // update cache + if (!cache_hit) { + edgeIds_block_cache = std::make_unique(block_number, uncompr_block); + } + } else if (block_number == start_block) { + // don't insert values before start + auto block_end = block_data + block_size; + result.insert(result.end(), block_data + start_block_pos, block_end); + + // deleting temporary column if not cached + if (!cache_hit) { + delete uncompr_block; + } + } else { + // insert whole block (should be very rare) + auto block_end = block_data + block_size; + result.insert(result.end(), block_data, block_end); + + // deleting temporary column (does not matter if cached as following block will overwrite the + // cache) + delete uncompr_block; + } + } + } + + assert(result.size() == degree); + + return result; + } + + void morph(GraphCompressionFormat target_format) override { + morph(target_format, target_format); + } + + // allowing different compressions for offset column and edgeId column + void morph(GraphCompressionFormat target_offset_format, GraphCompressionFormat target_edgeId_format) { +#if DEBUG + std::cout << "Morphing graph format specific data structures from " + << graph_compr_f_to_string(get_compression_format()) << " to " + << "offsets: " graph_compr_f_to_string(target_offset_format) + << " edgeIds: " << graph_compr_f_to_string(target_edgeId_format) << std::endl; +#endif + + offset_column = morph_saving_offsets_graph_col(offset_column, offsets_compression, target_offset_format, true); + edgeId_column = morph_saving_offsets_graph_col(edgeId_column, edgeIds_compression, target_edgeId_format, true); + + // invalidating caches (as block-size may differ) + if (offset_block_cache) { + offset_block_cache.reset(); + } + + if (edgeIds_block_cache) { + edgeIds_block_cache.reset(); + } + + this->offsets_compression = target_offset_format; + this->edgeIds_compression = target_edgeId_format; + } + + // get size of storage format: + std::pair get_size_of_graph() const override { + + auto [index_size, data_size] = Graph::get_size_of_graph(); + + // column_meta_data, prepared_for_random_access, .. not included in get_size_used_byte; + index_size += 2 * sizeof(column); + index_size += edgeId_column->get_size_used_byte(); + index_size += offset_column->get_size_used_byte(); + + return {index_size, data_size}; + } + + double offset_column_compr_ratio() { return compression_ratio(offset_column, offsets_compression); } + + double edgeId_column_compr_ratio() { return compression_ratio(edgeId_column, edgeIds_compression); } + + std::string get_column_info(column_with_blockoffsets_base *col_with_offsets, GraphCompressionFormat format) { + auto col = col_with_offsets->get_column(); + + return " values: " + std::to_string(col->get_count_values()) + + " size in bytes: " + std::to_string(col->get_size_used_byte()) + + " compression ratio: " + std::to_string(compression_ratio(col_with_offsets, format)) + + " number of blocks (if blocksize > 1): " + + std::to_string(col_with_offsets->get_block_offsets()->size()); + } + + void statistics() override { + Graph::statistics(); + std::cout << "offset column: " << get_column_info(offset_column, offsets_compression) << std::endl; + std::cout << "edgeId column: " << get_column_info(edgeId_column, edgeIds_compression) << std::endl; + std::cout << "--------------------------------------------" << std::endl; + std::cout << std::endl << std::endl; + } + }; +} // namespace morphstore +#endif // MORPHSTORE_CSR_H diff --git a/include/core/storage/graph/graph.h b/include/core/storage/graph/graph.h new file mode 100644 index 00000000..edfa830f --- /dev/null +++ b/include/core/storage/graph/graph.h @@ -0,0 +1,278 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file graph.h + * @brief base graph class for any storage format --> CSR,ADJ (allowing multi-graphs) + * @todo + */ + +#ifndef MORPHSTORE_GRAPH_H +#define MORPHSTORE_GRAPH_H + +#include "edge/edge.h" +#include "edge/edges_hashmap_container.h" +#include "edge/edges_vectorarray_container.h" +#include "property_type.h" +#include "vertex/vertex.h" +#include "vertex/vertices_hashmap_container.h" +#include "vertex/vertices_vectorarray_container.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace morphstore { + + class Graph { + + protected: + // TODO: actually just needed for CSR format (could be moved) + uint64_t expectedVertexCount; + uint64_t expectedEdgeCount; + + std::unique_ptr vertices; + std::unique_ptr edges; + + // graph format specific (CSR and Adj only differ in their graph topology representation) + virtual void add_to_vertex_edges_mapping(uint64_t sourceID, const std::vector edge_ids) = 0; + + public: + Graph(EdgesContainerType edges_container_type) + : Graph(VerticesContainerType::VectorArrayContainer, edges_container_type) {} + + Graph(VerticesContainerType vertices_container_type = VerticesContainerType::VectorArrayContainer, + EdgesContainerType edges_container_type = EdgesContainerType::VectorArrayContainer) { + // could be encapsulated in a VerticesContainer builder + switch (vertices_container_type) { + case VerticesContainerType::VectorArrayContainer: + vertices = std::make_unique(); + break; + case VerticesContainerType::HashMapContainer: + vertices = std::make_unique(); + break; + } + + // could be encapsulated in a EdgesContainer builder + switch (edges_container_type) { + case EdgesContainerType::VectorArrayContainer: + edges = std::make_unique(); + break; + case EdgesContainerType::HashMapContainer: + edges = std::make_unique(); + break; + } + } + + // human-readable form of the container (f.i. for benchmark) + std::string vertices_container_description() { return vertices->container_description(); } + + // human-readable form of the container (f.i. for benchmark) + std::string edges_container_description() { return edges->container_description(); } + + // -------------------- Setters & Getters -------------------- + + // each vertex has a type represented by a number (in Neo4j terms this would be a node label) + // this provides the semantics behind that number + void set_vertex_type_dictionary(const std::map &types) { + assert(types.size() != 0); + this->vertices->set_vertex_type_dictionary(types); + } + + // each edge has a type represented by a number (in Neo4j terms this would be a relationship type) + // this provides the semantics behind that number + void setEdgeTypeDictionary(const std::map &types) { + assert(types.size() != 0); + this->edges->set_edge_type_dictionary(types); + } + + // expected count provided by allocate_graph_structure + uint64_t getExpectedVertexCount() const { return expectedVertexCount; } + + // count of actually stored vertices + uint64_t getVertexCount() const { return vertices->vertex_count(); } + + // expected count provided by allocate_graph_structure + uint64_t getExpectedEdgeCount() const { return expectedEdgeCount; } + + // count of actually stored edges + uint64_t getEdgeCount() const { return edges->edge_count(); } + + uint64_t add_vertex(const unsigned short int type = 0, + const std::unordered_map props = {}) { + return vertices->add_vertex(type, props); + }; + + VertexWithProperties get_vertex(uint64_t id) { return vertices->get_vertex_with_properties(id); } + + EdgeWithIdAndProperties get_edge(uint64_t id) { return edges->get_edge_with_properties(id); } + + void add_property_to_vertex(uint64_t id, const std::pair property) { + vertices->add_property_to_vertex(id, property); + }; + + // only setting whole edge_properties, as adding an edge property was not needed yet + void set_edge_properties(uint64_t id, const std::unordered_map properties) { + edges->set_edge_properties(id, properties); + }; + + // human-readable form of the graph storage format + virtual std::string get_storage_format() const = 0; + virtual std::string get_compression_format() const = 0; + virtual uint64_t add_edge(uint64_t from, uint64_t to, unsigned short int type) = 0; + // changing the compression format + virtual void morph(GraphCompressionFormat target_format) = 0; + // outgoing, as they are only indexed in the outgoing direction + virtual std::vector get_outgoing_edge_ids(uint64_t id) = 0; + // get the out_degree of a vertex (size of the adjacency list) + virtual uint64_t get_out_degree(uint64_t id) = 0; + + // convenience method to returning the target vertex-ids of the outgoing edges + std::vector get_neighbors_ids(uint64_t id) { + std::vector targetVertexIds; + // guess this could be easily parallelized (using std::foreach f.i.) + for (auto edge_id : get_outgoing_edge_ids(id)) { + assert(edges->exists_edge(edge_id)); + targetVertexIds.push_back(edges->get_edge(edge_id).getTargetId()); + } + + return targetVertexIds; + }; + + // returning a vector of edge-ids (order based on input edges) + std::vector add_edges(uint64_t sourceId, const std::vector edges_to_add) { + std::vector edge_ids; + + // assertion, which are shared by all graph formats + if (!vertices->exists_vertex(sourceId)) { + throw std::runtime_error("Source-id not found " + std::to_string(sourceId)); + } + + // (multi)-graph specific and storage-format agnostic + // changes, if other formats store target ids instead of edge ids (because non multi graphs do not need edge ids) + for (auto edge : edges_to_add) { + if (!vertices->exists_vertex(edge.getTargetId())) { + throw std::runtime_error("Target not found :" + edge.to_string()); + } + edge_ids.push_back(edges->add_edge(edge)); + } + + add_to_vertex_edges_mapping(sourceId, edge_ids); + + return edge_ids; + }; + + // looks very similar to above but for ! EdgeWithProperties ! + // extra method, as runtime polymorphism seemed ugly in C++ here (but very likely there is a better way for this) + std::vector add_edges(uint64_t sourceId, const std::vector edges_to_add) { + std::vector edge_ids; + + if (!vertices->exists_vertex(sourceId)) { + throw std::runtime_error("Source-id not found " + std::to_string(sourceId)); + } + + for (auto edge_with_props : edges_to_add) { + if (auto edge = edge_with_props.getEdge(); !vertices->exists_vertex(edge.getTargetId())) { + throw std::runtime_error("Target not found :" + edge.to_string()); + } + // this calls a different methods on the edges-container + edge_ids.push_back(edges->add_edge(edge_with_props)); + } + + add_to_vertex_edges_mapping(sourceId, edge_ids); + + return edge_ids; + }; + + // memory estimation + // returns a pair of index-size, data-size + virtual std::pair get_size_of_graph() const { + // including vertices + its properties + its type dict + auto [index_size, data_size] = vertices->get_size(); + + // including edges + its properties + its type dict + auto edges_size = edges->get_size(); + index_size += edges_size.first; + data_size += edges_size.second; + + return {index_size, data_size}; + }; + + + // mainly needed to allocate CSR columns + // also containers can reserve expected size + virtual void allocate_graph_structure(uint64_t expected_vertices, uint64_t expected_edges) { + this->expectedVertexCount = expected_vertices; + this->expectedEdgeCount = expected_edges; + + vertices->allocate(expected_vertices); + edges->allocate(expected_edges); + }; + + // -------------------- debugging functions -------------------- + + void print_neighbors_of_vertex(uint64_t id) { + std::cout << std::endl << "Neighbours for Vertex with id " << id << std::endl; + auto edge_ids = get_outgoing_edge_ids(id); + + if (edge_ids.size() == 0) { + std::cout << " No outgoing edges for vertex with id: " << id << std::endl; + } else { + for (const auto edge_id : edge_ids) { + print_edge_by_id(edge_id); + } + } + } + + // basic statistics to be extended by graph formats + virtual void statistics() { + std::cout << "---------------- Statistics ----------------" << std::endl; + std::cout << "Number of vertices: " << getVertexCount() << std::endl; + std::cout << "Number of vertices with properties:" << vertices->vertices_with_properties_count() + << std::endl; + std::cout << "Number of edges: " << getEdgeCount() << std::endl; + std::cout << "Number of edges with properties:" << edges->edges_with_properties_count() << std::endl; + std::cout << "Compression Format:" << get_compression_format() << std::endl; + } + + void print_vertex_by_id(uint64_t id) { + vertices->print_vertex_by_id(id); + std::cout << "\n"; + std::cout << "#Edges: " << this->get_out_degree(id); + std::cout << "\n"; + std::cout << "-----------------------------------------------" << std::endl; + } + + void print_edge_by_id(uint64_t id) { edges->print_edge_by_id(id); } + + void print_type_dicts() { + vertices->print_type_dict(); + edges->print_type_dict(); + } + }; + +} // namespace morphstore + +#endif // MORPHSTORE_GRAPH_H diff --git a/include/core/storage/graph/graph_compr_format.h b/include/core/storage/graph/graph_compr_format.h new file mode 100644 index 00000000..22c6a381 --- /dev/null +++ b/include/core/storage/graph/graph_compr_format.h @@ -0,0 +1,93 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file graph_compr_format.h + * @brief helper for specifying compression of graph format specific columns + * @todo remove need for extra graph-compression format + */ + +#ifndef MORPHSTORE_GRAPH_COMPR_FORMAT_H +#define MORPHSTORE_GRAPH_COMPR_FORMAT_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace morphstore { + // TODO: allow also other vector extensions (regard build flag) + using ve = vectorlib::scalar>; + + using default_vbp = DEFAULT_DYNAMIC_VBP_F(ve); + using default_delta = DEFAULT_DELTA_DYNAMIC_VBP_F(ve); + using default_for = DEFAULT_FOR_DYNAMIC_VBP_F(ve); + + enum class GraphCompressionFormat { DELTA, FOR, UNCOMPRESSED, DYNAMIC_VBP }; + + std::string graph_compr_f_to_string(GraphCompressionFormat format) { + std::string desc; + + switch (format) { + case GraphCompressionFormat::DELTA: + desc = "Delta (Default)"; + break; + case GraphCompressionFormat::UNCOMPRESSED: + desc = "Uncompressed"; + break; + case GraphCompressionFormat::FOR: + desc = "Frame of Reference (Default)"; + break; + case GraphCompressionFormat::DYNAMIC_VBP: + desc = "Dynamic vertical bitpacking (Default)"; + break; + } + + return desc; + } + + // gets m_BlockSize using the corresponding format + // as GraphCompressionFormat is just a simple enum + size_t inline graph_compr_f_block_size(GraphCompressionFormat format) { + size_t block_size = 1; + + switch (format) { + case GraphCompressionFormat::DELTA: + block_size = default_delta::m_BlockSize; + break; + case GraphCompressionFormat::UNCOMPRESSED: + block_size = uncompr_f::m_BlockSize; + break; + case GraphCompressionFormat::FOR: + block_size = default_for::m_BlockSize; + break; + case GraphCompressionFormat::DYNAMIC_VBP: + block_size = default_vbp::m_BlockSize; + break; + } + + return block_size; + } +} // namespace morphstore + +#endif // MORPHSTORE_GRAPH_COMPR_FORMAT_H \ No newline at end of file diff --git a/include/core/storage/graph/importer/ldbc_import.h b/include/core/storage/graph/importer/ldbc_import.h new file mode 100644 index 00000000..0e922fb8 --- /dev/null +++ b/include/core/storage/graph/importer/ldbc_import.h @@ -0,0 +1,699 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file ldbc_import.h + * @brief this class reads the ldbc files and generates the graph in CSR or AdjList + * @todo support for array properties (for simplicity only last one take currently) + */ + +#ifndef MORPHSTORE_LDBC_IMPORT_H +#define MORPHSTORE_LDBC_IMPORT_H + +#include "ldbc_schema.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// hash function used to hash a pair of any kind using XOR (for verticesMap) +struct hash_pair { + template size_t operator()(const std::pair &p) const { + auto hash1 = std::hash{}(p.first); + auto hash2 = std::hash{}(p.second); + return hash1 ^ hash2; + } +}; + +namespace morphstore { + + class LDBCImport { + + private: + std::filesystem::path base_directory; + std::vector verticesPaths; + std::vector edgesPaths; + std::map vertexTypeLookup; + std::map edgeTypeLookup; + // data structure for lookup local ids with vertexType to global system id: (vertexType, ldbc_id) -> global id + std::unordered_map, uint64_t, hash_pair> globalIdLookupMap; + + // unordered_map for lookup system-id and its in the graph (for further processing, e.g. filling the edge_array + // in the right order) + std::unordered_map> vertexEdgesLookup; + + public: + // directory including a static/ and dynamic/ directory like in /ldbc_snb_datagen/social_network/ + LDBCImport(const std::string &dir) { + base_directory = dir; + insert_file_names(); + } + + std::string getDirectory() const { return base_directory; } + + // get the vertex or edge type based on the fileName + std::string getEntityType(std::filesystem::path filePath) { + // last [a-zA-Z] to remove ending _ + std::regex typeRegExp("[a-zA-Z_]+[a-zA-Z]"); + std::smatch match; + + std::string fileName = filePath.filename().string(); + + if (std::regex_search(fileName, match, typeRegExp)) { + // std::cout << "EntityType: " << match[0] << std::endl; + // std::cout.flush(); + return match[0]; + } else { + throw std::invalid_argument("No EntityType in: " + fileName); + } + } + + // function which iterates through the base_directory to receive file names (entire path) + void insert_file_names() { + + std::filesystem::path dynamic_data_dir(base_directory / "dynamic"); + std::filesystem::path static_data_dir(base_directory / "static"); + std::vector dirs{dynamic_data_dir, static_data_dir}; + + for (const auto dir : dirs) { + for (const auto &entry : std::filesystem::directory_iterator(dir)) { + // ignore files starting with a '.' (+ 1 as '/' is the first character otherwise) + if (entry.path().string()[dir.u8string().length() + 1] == '.') { + continue; + } else { + // insert file path to vertices or edges vector + differentiate(entry.path().string()); + } + } + } + + if (verticesPaths.empty()) { + print_file_names(); + throw std::invalid_argument("No vertex files found"); + } + } + + // this function differentiates, whether the file is a vertex or edge and puts it into the specific vector + void differentiate(std::filesystem::path path) { + // if the string contains a '_' -> it's a edge file; otherwise a vertex file + // if string contains word_word it is an edge files (vertex files only contain one word) + + // a vertex file name contains exactly one word and after that only numbers are allowed f.i. _0_0 + // .*\\/ for path marks the directory path + std::regex vertexFileRegExp("^(.*\\/)([a-zA-Z]+\\_)([0-9_]*).csv$"); + + if (std::regex_match(path.u8string(), vertexFileRegExp)) { + verticesPaths.push_back(path); + } else { + edgesPaths.push_back(path); + } + } + + // this function reads the vertices-files and creates vertices in a graph + // + creates the vertexTypeLookup (number to string) for the graph + void generate_vertices(Graph &graph) { + std::cout << "(1/2) Generating LDBC-Vertices ..."; + std::cout.flush(); + + // iterate through vector of vertex-addresses + for (const auto &file : verticesPaths) { + // data structure for attributes of entity, e.g. taglass -> id, name, url + std::vector> attributes; + + std::string vertexType = getEntityType(file); + int vertexTypeNumber = get_vertex_type_number(vertexType).value(); + + char *buffer; + + uint64_t fileSize = 0; + + std::string address = file; + + std::ifstream vertexFile( + address, + std::ios::binary | std::ios::ate); // 'ate' means: open and seek to end immediately after opening + + if (!vertexFile) { + std::cerr << "Error, opening file. "; + exit(EXIT_FAILURE); + } + + // calculate file size + if (vertexFile.is_open()) { + // tellg() returns: The current position of the get pointer in the stream on success, pos_type(-1) + // on failure. + fileSize = static_cast(vertexFile.tellg()); + vertexFile.clear(); + // Seeks to the very beginning of the file, clearing any fail bits first (such as the end-of-file + // bit) + vertexFile.seekg(0, std::ios::beg); + } + + // allocate memory + buffer = (char *)malloc(fileSize * sizeof(char)); + vertexFile.read(buffer, fileSize); // read data as one big block + size_t start = 0; + std::string delimiter = "|"; + + // read buffer and do the magic ... + for (size_t i = 0; i < fileSize; ++i) { + if (buffer[i] == '\n') { + // get a row into string form buffer with start- and end-point + std::string row(&buffer[start], &buffer[i]); + + // remove unnecessary '\n' at the beginning of a string + if (row.find('\n') != std::string::npos) { + row.erase(0, 1); + } + + size_t last = 0; + size_t next = 0; + + // first line of *.csv contains the attributes -> write to attributes vector + if (start == 0) { + std::string property_key; + Ldbc_Data_Type data_type; + // extract attribute from delimiter, e.g. id|name|url to id,name,url and push back to + // attributes vector + while ((next = row.find(delimiter, last)) != std::string::npos) { + property_key = row.substr(last, next - last); + data_type = get_data_type(vertexType, property_key); + if (data_type == Ldbc_Data_Type::ERROR) { + std::cout + << "Unexpected property: in " << file.string() + << ":" << vertexType << ":" << property_key << " could not be found in schema"; + data_type = Ldbc_Data_Type::STRING; + } + attributes.push_back(std::make_pair(property_key, data_type)); + last = next + 1; + } + // last attribute + property_key = row.substr(last); + data_type = get_data_type(vertexType, property_key); + if (data_type == Ldbc_Data_Type::ERROR) { + std::cout + << "Unexpected property: in " << file.string() + << ":" << vertexType << ":" << property_key << " could not be found in schema"; + data_type = Ldbc_Data_Type::STRING; + } + attributes.push_back(std::make_pair(property_key, data_type)); + } else { + // actual data: + std::unordered_map properties; + size_t attrIndex = 0; + std::string ldbcID = row.substr(0, row.find(delimiter)); + while ((next = row.find(delimiter, last)) != std::string::npos) { + auto key_to_datatype = attributes[attrIndex]; + property_type property_value = + convert_property_value(row.substr(last, next - last), key_to_datatype.second); + properties.insert(std::make_pair(key_to_datatype.first, property_value)); + last = next + 1; + ++attrIndex; + } + // last attribute + auto key_to_datatype = attributes[attrIndex]; + property_type propertyValue = + convert_property_value(row.substr(last), key_to_datatype.second); + properties.insert(std::make_pair(key_to_datatype.first, propertyValue)); + + //----------------------------------------------------- + // create vertex and insert into graph with properties + uint64_t systemID = graph.add_vertex(vertexTypeNumber, properties); + + // map vertexType and ldbc id to system generated id + globalIdLookupMap.insert({{vertexType, ldbcID}, systemID}); + //----------------------------------------------------- + properties.clear(); // free memory + } + + start = i; // set new starting point for buffer (otherwise it's concatenated) + } + } + + free(buffer); // free memory + vertexFile.close(); + + ++vertexTypeNumber; + attributes.clear(); + } + } + + // function which returns the vertex_type_number (has no value if non existing) + std::optional get_vertex_type_number(const std::string &vertexType) { + // iterate through entities-map to look up for paramater + for (auto const &entry : vertexTypeLookup) { + if (entry.second == vertexType) { + return entry.first; + } + } + + return {}; + } + + // function which returns true, if the edge type already exist + bool exist_edge_type_name(const std::string &edge_type) { + // iterate through edges-map to look up for paramater + for (auto const &entry : edgeTypeLookup) { + if (entry.second == edge_type) { + return true; + } + } + + return false; + } + + // for debugging + void print_file_names() { + std::cout << "File-directory: " << getDirectory() << std::endl; + std::cout << "Vertices-Files: " << std::endl; + for (const auto &v : verticesPaths) { + std::cout << "\t" << v << std::endl; + } + std::cout << "Edge-Files: " << std::endl; + for (const auto &rel : edgesPaths) { + std::cout << "\t" << rel << std::endl; + } + } + + // function which clears all intermediates after import + void clear_intermediates() { + std::cout << "CleanUp"; + globalIdLookupMap.clear(); + edgeTypeLookup.clear(); + vertexTypeLookup.clear(); + edgesPaths.clear(); + verticesPaths.clear(); + vertexEdgesLookup.clear(); + } + + // function which returns the total number of edges (IMPORTANT: vertex generation has to be done first, because + // of the vertexType lookup creation) + uint64_t get_total_number_edges() { + + uint64_t result = 0; + + if (!edgesPaths.empty()) { + + // iterate through vector of edge-type file paths + for (const auto &file : edgesPaths) { + std::string edge_type = getEntityType(file); + + // TOdo: use regExp ([a-zA-Z]+)_([a-zA-Z]+)_([a-zA-Z]+) + std::string sourceVertexType = edge_type.substr(0, edge_type.find('_')); + edge_type.erase(0, edge_type.find('_') + 1); + + std::string edgeType = edge_type.substr(0, edge_type.find('_')); + edge_type.erase(0, edge_type.find('_') + 1); + + std::string targetVertexType = edge_type; + + char *buffer; + + uint64_t fileSize = 0; + + std::ifstream edgeFile( + file, std::ios::binary | + std::ios::ate); // 'ate' means: open and seek to end immediately after opening + + if (!edgeFile) { + std::cerr << "Error, opening file. "; + exit(EXIT_FAILURE); + } + + // calculate file size + if (edgeFile.is_open()) { + fileSize = static_cast( + edgeFile.tellg()); // tellg() returns: The current position of the get pointer in the + // stream on success, pos_type(-1) on failure. + edgeFile.clear(); + edgeFile.seekg(0, std::ios::beg); // Seeks to the very beginning of the file, clearing any fail + // bits first (such as the end-of-file bit) + } + + // allocate memory + buffer = (char *)malloc(fileSize * sizeof(char)); + edgeFile.read(buffer, fileSize); // read data as one big block + bool firstLine = true; + + // check from file name whether it's a edge file or multi value attribute file + if (get_vertex_type_number(targetVertexType).has_value()) { + + for (size_t i = 0; i < fileSize; ++i) { + if (buffer[i] == '\n') { + // skip first line (attributes infos....) + if (firstLine) { + firstLine = false; + } else { + ++result; + } + } + } + } + + free(buffer); // free memory + edgeFile.close(); + } + } + return result; + } + + // get number of vertices from files and fill vertexTypeDictionary + uint64_t get_total_number_vertices() { + + uint64_t result = 0; + + // iterate through vector of vertex-addresses + for (const auto &file : verticesPaths) { + char *buffer; + uint64_t fileSize = 0; + std::ifstream vertexFile( + file, + std::ios::binary | std::ios::ate); // 'ate' means: open and seek to end immediately after opening + + if (!vertexFile) { + std::cerr << "Error, opening file. "; + exit(EXIT_FAILURE); + } + + // calculate file size + if (vertexFile.is_open()) { + fileSize = static_cast( + vertexFile.tellg()); // tellg() returns: The current position of the get pointer in the stream + // on success, pos_type(-1) on failure. + vertexFile.clear(); + // Seeks to the very beginning of the file, clearing any fail bits first (such as the end-of-file + // bit) + vertexFile.seekg(0, std::ios::beg); + } + + // allocate memory + buffer = (char *)malloc(fileSize * sizeof(char)); + vertexFile.read(buffer, fileSize); // read data as one big block + size_t start = 0; + std::string delimiter = "|"; + + // read buffer and do the magic ... + for (size_t i = 0; i < fileSize; ++i) { + if (buffer[i] == '\n') { + // get a row into string form buffer with start- and end-point + std::string row(&buffer[start], &buffer[i]); + + // remove unnecessary '\n' at the beginning of a string + if (row.find('\n') != std::string::npos) { + row.erase(0, 1); + } + + // first line of *.csv contains the attributes -> write to attributes vector + if (start != 0) { + ++result; + } + + // set new starting point for buffer (otherwise it's concatenated) + start = i; + } + } + + free(buffer); // free memory + vertexFile.close(); + } + return result; + } + + // this function reads the edge-files and fills the intermediate: vertexEdgeLookup + // + creates the edgeLookup (number to string) for the graph + void fill_vertexEdgesLookup(Graph &graph) { + + if (!edgesPaths.empty()) { + std::cout << "(2/2) Generating LDBC-Edges ..."; + std::cout.flush(); + + // this variable is used for the edgeLookup-keys, starting by 0 + unsigned short int edgeTypeNumber = 0; + + // iterate through vector of vertex-addresses + for (const auto &file : edgesPaths) { + // get the edge-infos from file name: e.g. ([...path...] / [person_likes_comment].csv) --> + // person_likes_comment + // TODO: use regExp + std::string edge_type = getEntityType(file); + std::string sourceVertexType = edge_type.substr(0, edge_type.find('_')); + edge_type.erase(0, edge_type.find('_') + 1); + + std::string edgeType = edge_type.substr(0, edge_type.find('_')); + edge_type.erase(0, edge_type.find('_') + 1); + + std::string targetVertexType = edge_type; + + char *buffer; + + uint64_t fileSize = 0; + + std::ifstream edgeFile( + file, std::ios::binary | + std::ios::ate); // 'ate' means: open and seek to end immediately after opening + + if (!edgeFile) { + std::cerr << "Error, opening file. "; + exit(EXIT_FAILURE); + } + + // calculate file size + if (edgeFile.is_open()) { + fileSize = static_cast( + edgeFile.tellg()); // tellg() returns: The current position of the get pointer in the + // stream on success, pos_type(-1) on failure. + edgeFile.clear(); + edgeFile.seekg(0, std::ios::beg); // Seeks to the very beginning of the file, clearing any fail + // bits first (such as the end-of-file bit) + } + + // allocate memory + buffer = (char *)malloc(fileSize * sizeof(char)); + edgeFile.read(buffer, fileSize); // read data as one big block + + size_t start = 0; + std::string delimiter = "|"; + + // check from file name whether it's an edge file or multi value attribute file + if (!get_vertex_type_number(targetVertexType).has_value()) { + // Multi-value-attributes: just take the last recently one + std::string propertyKey; + Ldbc_Data_Type data_type = Ldbc_Data_Type::STRING; + std::unordered_map multiValueAttr; + uint64_t systemID; + property_type value; + + for (size_t i = 0; i < fileSize; ++i) { + if (buffer[i] == '\n') { + // get a row into string form buffer with start- and end-point + std::string row(&buffer[start], &buffer[i]); + + // remove unnecessary '\n' at the beginning of a string + if (row.find('\n') != std::string::npos) { + row.erase(0, 1); + } + + // first line: get the attribute a.k.a key for the property, e.g. Person.id|email -> + // get 'email' + if (start == 0) { + propertyKey = row.substr(row.find(delimiter) + 1); + data_type = get_data_type(sourceVertexType, propertyKey); + if (data_type == Ldbc_Data_Type::ERROR) { + std::cout + << "Unexpected property: in " << file.string() + << ":" << edgeType << ":" << propertyKey << " could not be found in schema"; + data_type = Ldbc_Data_Type::STRING; + } + } else { + // (1) write data to vector: if key is already present, over write value + // (simplicity: we take the newest one) + systemID = + globalIdLookupMap[{sourceVertexType, row.substr(0, row.find(delimiter))}]; + value = convert_property_value(row.substr(row.find(delimiter) + 1), data_type); + multiValueAttr[systemID] = std::move(value); + } + + start = i; // set new starting point for buffer (otherwise it's concatenated) + } + } + // iterate through multiValue map and assign property to vertex + for (const auto &pair : multiValueAttr) { + // const std::pair keyValuePair = {propertyKey, pair.second}; + graph.add_property_to_vertex(pair.first, {propertyKey, pair.second}); + } + + } + // handling of edge-files ... + else { + // check if the name already exists + if (!exist_edge_type_name(edgeType)) { + ++edgeTypeNumber; + edgeTypeLookup.insert(std::make_pair(edgeTypeNumber, edgeType)); + } + + bool hasProperties = false; + std::string propertyKey; + uint64_t sourceVertexId, targetVertexId; + + // read buffer and do the magic ... + for (size_t i = 0; i < fileSize; ++i) { + if (buffer[i] == '\n') { + // get a row into string form buffer with start- and end-point + std::string row(&buffer[start], &buffer[i]); + + // remove unnecessary '\n' at the beginning of a string + if (row.find('\n') != std::string::npos) { + row.erase(0, 1); + } + + size_t last = 0; + size_t next = 0; + size_t count = 0; + + // first line of *.csv: Differentiate whether it's + // (1) edge without properties: e.g. Person.id|Person.id -> #delimiter = 1 + // (2) edge with properties: e.g. Person.id|Person.id|fromDate -> #delimiter = 2 + if (start == 0) { + // if there are 2 delimiter ('|') -> edge file with properties + while ((next = row.find(delimiter, last)) != std::string::npos) { + last = next + 1; + ++count; + } + if (count == 2) { + hasProperties = true; + propertyKey = row.substr(last); + } + } else { + // lines of data: (from_local-ldbc-id), (to_local-ldbc-id) and property + // get the system-(global) id's from local ids + sourceVertexId = + globalIdLookupMap.at({sourceVertexType, row.substr(0, row.find(delimiter))}); + // remove from id from string + row.erase(0, row.find(delimiter) + delimiter.length()); + std::string value; + if (!hasProperties) { + // WITHOUT properties: just from the first delimiter on + targetVertexId = globalIdLookupMap.at({targetVertexType, row}); + + // insert edge into vertexRealtionsLookup: + vertexEdgesLookup[sourceVertexId].push_back( + EdgeWithProperties(sourceVertexId, targetVertexId, edgeTypeNumber)); + } else { + // with properties means: toID is until the next delimiter, and then the value + // for the property + targetVertexId = globalIdLookupMap.at( + {targetVertexType, row.substr(0, row.find(delimiter))}); + row.erase(0, row.find(delimiter) + delimiter.length()); + value = row; + + // insert edge into vertexEdgesLookup with its edge-property: + // assuming all properties of an edge are defined in the same file + auto edge = EdgeWithProperties(sourceVertexId, targetVertexId, edgeTypeNumber, + {{propertyKey, value}}); + vertexEdgesLookup[sourceVertexId].push_back(edge); + } + } + start = i; // set new starting point for buffer (otherwise it's concatenated) + } + } + } + free(buffer); // free memory + edgeFile.close(); + } + // graph gets full edge-type-list here: + graph.setEdgeTypeDictionary(edgeTypeLookup); + } + } + + // TODO: is this function really needed? + // function for sorting the vertexEdgesLookup ASC (needed in CSR) + // sorting for every vertex its vector list with target-ids ASC + void sort_VertexEdgesLookup() { + // sorting the first element of the pair (target-id) + for (auto &rel : vertexEdgesLookup) { + std::sort(rel.second.begin(), rel.second.end()); + } + } + + // this function writes the actual data from the intermediate vertexEdgesLookup into the graph + void generate_edges(Graph &graph) { + std::cout << " Writing edges into graph " << std::endl; + // firstly, sorting the intermediates with their target IDs ASC + sort_VertexEdgesLookup(); + + uint64_t graphSize = graph.getVertexCount(); + + for (uint64_t vertexID = 0; vertexID < graphSize; ++vertexID) { + auto edges = vertexEdgesLookup[vertexID]; + // add edge data: + graph.add_edges(vertexID, edges); + } + } + + void generate_vertex_type_lookup() { + uint64_t vertex_type_number = 0; + for (std::string vertex_file : verticesPaths) { + vertexTypeLookup.insert(std::make_pair(vertex_type_number, getEntityType(vertex_file))); + vertex_type_number++; + } + } + + // MAIN IMPORT FUNCTION: see steps in comments + void import(Graph &graph) { + std::cout << "Importing LDBC-files into graph ... "; + std::cout.flush(); + + // (1) get number vertices and number edges: + uint64_t numberVertices = get_total_number_vertices(); + + // populate vertex_type_lookup for differentiating between edge and property files + generate_vertex_type_lookup(); + graph.set_vertex_type_dictionary(vertexTypeLookup); + + uint64_t numberEdges = get_total_number_edges(); + + // (2) allocate graph memory + graph.allocate_graph_structure(numberVertices, numberEdges); + + // (3) generate vertices + generate_vertices(graph); + + // (4) read edges and write to intermediate results + fill_vertexEdgesLookup(graph); + + // (5) read intermediates and write edges + generate_edges(graph); + + // (6) clear intermediates + clear_intermediates(); + + std::cout << "--> done" << std::endl; + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_LDBC_IMPORT_H diff --git a/include/core/storage/graph/importer/ldbc_schema.h b/include/core/storage/graph/importer/ldbc_schema.h new file mode 100644 index 00000000..7470b954 --- /dev/null +++ b/include/core/storage/graph/importer/ldbc_schema.h @@ -0,0 +1,139 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file ldbc_schema.h + * @brief Schema of the LDBC graph based on + * https://raw.githubusercontent.com/ldbc/ldbc_snb_docs/dev/figures/schema-comfortable.png + * @todo search for an existing Graph-Schema language (graph schemas should be stored in the resource folder) + */ + +#ifndef MORPHSTORE_LDBC_SCHEMA_H +#define MORPHSTORE_LDBC_SCHEMA_H + +#include + +#include +#include +#include + +namespace morphstore { + + enum class Ldbc_Data_Type { LONG_STRING, STRING, TEXT, INT_32, ID, DATE_TIME, DATE, ERROR }; + + // static not included -> f.i. hasTag edge seen as property tag.id + static const std::map> ldbc_schema{{ + // vertices + {"person", + {{"creationDate", Ldbc_Data_Type::DATE_TIME}, + {"firstName", Ldbc_Data_Type::STRING}, + {"lastName", Ldbc_Data_Type::STRING}, + {"gender", Ldbc_Data_Type::STRING}, + {"birthday", Ldbc_Data_Type::DATE}, + // !TODO actually an array of emails + {"email", Ldbc_Data_Type::LONG_STRING}, + // !TODO actually an array of languages + + // (and not currently filled as csv header contains "language") + //{"speaks", Ldbc_Data_Type::STRING}, + // TODO actually values for "speaks" array + {"language", Ldbc_Data_Type::STRING}, + {"browserUsed", Ldbc_Data_Type::STRING}, + {"locationIP", Ldbc_Data_Type::STRING}}}, + {"forum", + {{"creationDate", Ldbc_Data_Type::DATE_TIME}, + {"title", Ldbc_Data_Type::LONG_STRING}, + {"type", Ldbc_Data_Type::STRING}}}, + {"post", + {{"creationDate", Ldbc_Data_Type::DATE_TIME}, + {"browserUsed", Ldbc_Data_Type::STRING}, + {"locationIP", Ldbc_Data_Type::STRING}, + {"length", Ldbc_Data_Type::INT_32}, + // TODO: extra nullable type for the following 3: like TEXT? + {"content", Ldbc_Data_Type::TEXT}, + {"language", Ldbc_Data_Type::STRING}, + {"imageFile", Ldbc_Data_Type::STRING}}}, + {"comment", + {{"creationDate", Ldbc_Data_Type::DATE_TIME}, + {"browserUsed", Ldbc_Data_Type::STRING}, + {"locationIP", Ldbc_Data_Type::STRING}, + {"content", Ldbc_Data_Type::TEXT}, + {"length", Ldbc_Data_Type::INT_32}}}, + {"tagclass", {{"name", Ldbc_Data_Type::LONG_STRING}, {"url", Ldbc_Data_Type::LONG_STRING}}}, + {"tag", {{"name", Ldbc_Data_Type::LONG_STRING}, {"url", Ldbc_Data_Type::LONG_STRING}}}, + {"place", + {{"name", Ldbc_Data_Type::LONG_STRING}, + {"url", Ldbc_Data_Type::LONG_STRING}, + {"type", Ldbc_Data_Type::STRING}}}, + {"organisation", + {{"name", Ldbc_Data_Type::LONG_STRING}, + {"type", Ldbc_Data_Type::STRING}, + {"url", Ldbc_Data_Type::LONG_STRING}}}, + // edges + {"likes", {{"creationDate", Ldbc_Data_Type::DATE_TIME}}}, + {"hasMember", {{"joinDate", Ldbc_Data_Type::DATE_TIME}}}, + {"hasModerator", {}}, + {"hasCreator", {}}, + {"hasTag", {}}, + {"containerOf", {}}, + {"replyOf", {}}, + {"isSubclassOf", {}}, + {"isPartOf", {}}, + {"isLocatedIn", {}}, + {"studyAt", {{"classYear", Ldbc_Data_Type::INT_32}}}, + {"workAt", {{"workFrom", Ldbc_Data_Type::INT_32}}}, + {"knows", {{"creationDate", Ldbc_Data_Type::DATE_TIME}}}, + }}; + + Ldbc_Data_Type get_data_type(std::string entity_type, std::string property_key) { + auto perEntity = ldbc_schema.find(entity_type); + if (perEntity != ldbc_schema.end()) { + auto propertiesMap = perEntity->second; + auto propertyEntry = propertiesMap.find(property_key); + if (propertyEntry != propertiesMap.end()) { + return propertyEntry->second; + } + } + + // ldbc id is saved as an extra property as morphstore::graph generates new ones + // static part of social network not included thus saved as property (!!wrongly!!) + if (property_key == "id") + return Ldbc_Data_Type::ID; + + // std::cout << "Could not find a data type for " << entity_type << " " << property_key; + return Ldbc_Data_Type::ERROR; + } + + property_type convert_property_value(std::string value, Ldbc_Data_Type type) { + property_type converted_value; + + switch (type) { + case Ldbc_Data_Type::INT_32: + converted_value = std::stoi(value); + break; + case Ldbc_Data_Type::ID: + converted_value = std::stoull(value); + break; + default: + converted_value = value; + }; + + return converted_value; + } +} // namespace morphstore + +#endif // MORPHSTORE_PROPERTY_TYPE_H \ No newline at end of file diff --git a/include/core/storage/graph/property_type.h b/include/core/storage/graph/property_type.h new file mode 100644 index 00000000..3da8c7bf --- /dev/null +++ b/include/core/storage/graph/property_type.h @@ -0,0 +1,42 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file property_type.h + * @brief variant of supported data types for a property (vertex or edge property) + * @todo Move into dedicated sub-folder (when different property mappings exists) + */ + +#ifndef MORPHSTORE_PROPERTY_TYPE_H +#define MORPHSTORE_PROPERTY_TYPE_H + +#include +#include + +namespace morphstore { + // only to used if properties are stored per node or triple store + // TODO: handle date and datetime properties and maybe text + using property_type = std::variant; + + struct PropertyValueVisitor { + void operator()(const std::string &s) const { std::cout << "(string) " << s; } + void operator()(uint64_t i) const { std::cout << "(uint_64t) " << i; } + }; + +} // namespace morphstore + +#endif // MORPHSTORE_PROPERTY_TYPE_H \ No newline at end of file diff --git a/include/core/storage/graph/vertex/vertex.h b/include/core/storage/graph/vertex/vertex.h new file mode 100644 index 00000000..da4dfe43 --- /dev/null +++ b/include/core/storage/graph/vertex/vertex.h @@ -0,0 +1,111 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file vertex.h + * @brief abstract vertex class for storage formats + * @todo + */ + +#ifndef MORPHSTORE_VERTEX_H +#define MORPHSTORE_VERTEX_H + +#include + +#include +#include +#include +#include + +namespace morphstore { + + class Vertex { + + protected: + // vertex: id, + uint64_t id; + // optional: type, properties + unsigned short int type; + + // delete flag + bool valid = false; + + public: + // default constr. needed for VertexWithProperties(Vertex vertex, const std::unordered_map properties) otherwise compiler won't accept + Vertex(){}; + + Vertex(uint64_t id, unsigned short int type = 0) { + this->id = id; + this->type = type; + this->valid = true; + } + + uint64_t getID() const { return id; } + + unsigned short getType() const { return type; } + + bool isValid() const { return valid; } + + // this is needed when using VerticesVectorArrayContainer when doing vertex_array[offset] = vertex + Vertex &operator=(const Vertex &vertex) { + // self-assignment guard + if (this == &vertex) + return *this; + + // do the copy + this->id = vertex.id; + this->type = vertex.type; + this->valid = vertex.valid; + + // return the existing object so we can chain this operator + return *this; + } + + // get size of vertex in bytes: + static size_t get_data_size_of_vertex() { + size_t size = 0; + size += sizeof(uint64_t); // id + size += sizeof(unsigned short int); // entity + size += sizeof(bool); // valid flag + + return size; + } + }; + + // convinience class for returning whole vertices + class VertexWithProperties { + private: + Vertex vertex; + std::unordered_map properties; + + public: + VertexWithProperties(Vertex vertex, const std::unordered_map properties) { + this->vertex = vertex; + this->properties = properties; + } + + uint64_t getID() { return vertex.getID(); } + + unsigned short getType() const { return vertex.getType(); } + + std::unordered_map getProperties() { return properties; } + }; + +} // namespace morphstore + +#endif // MORPHSTORE_VERTEX_H diff --git a/include/core/storage/graph/vertex/vertices_container.h b/include/core/storage/graph/vertex/vertices_container.h new file mode 100644 index 00000000..6ac9280e --- /dev/null +++ b/include/core/storage/graph/vertex/vertices_container.h @@ -0,0 +1,143 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file vertices_container.h + * @brief abstract class for storing vertices + * @todo + */ + +#ifndef MORPHSTORE_VERTICES_CONTAINER_H +#define MORPHSTORE_VERTICES_CONTAINER_H + +#include +#include + +#include +#include +#include +#include + +namespace morphstore { + enum class VerticesContainerType { HashMapContainer, VectorArrayContainer }; + class VerticesContainer { + protected: + uint64_t currentMaxVertexId = 0; + uint64_t expected_vertex_count = 0; + std::map vertex_type_dictionary; + + // TODO: try other property storage formats than per node .. (triple-store or per property) + std::unordered_map> vertex_properties; + + std::string get_vertex_type(unsigned short int type) const { + if (vertex_type_dictionary.find(type) != vertex_type_dictionary.end()) { + return vertex_type_dictionary.at(type); + } else { + return "No Matching of type-number in the database! For type " + std::to_string(type); + } + } + + uint64_t getNextVertexId() { return currentMaxVertexId++; } + + public: + virtual std::string container_description() const = 0; + virtual void insert_vertex(Vertex v) = 0; + virtual bool exists_vertex(const uint64_t id) const = 0; + virtual Vertex get_vertex(uint64_t id) = 0; + virtual uint64_t vertex_count() const = 0; + + virtual void allocate(uint64_t expected_vertices) { + vertex_properties.reserve(expected_vertices); + expected_vertex_count += expected_vertices; + } + + uint64_t add_vertex(const unsigned short int type, + const std::unordered_map properties = {}) { + assert(currentMaxVertexId < expected_vertex_count); + Vertex v = Vertex(getNextVertexId(), type); + insert_vertex(v); + if (!properties.empty()) { + vertex_properties.insert(std::make_pair(v.getID(), properties)); + } + + return v.getID(); + } + + void add_property_to_vertex(uint64_t id, const std::pair property) { + assert(exists_vertex(id)); + vertex_properties[id].insert(property); + }; + + void set_vertex_type_dictionary(const std::map &types) { + assert(types.size() != 0); + this->vertex_type_dictionary = types; + } + + const VertexWithProperties get_vertex_with_properties(uint64_t id) { + assert(exists_vertex(id)); + return VertexWithProperties(get_vertex(id), vertex_properties[id]); + } + + uint64_t vertices_with_properties_count() { return vertex_properties.size(); } + + virtual std::pair get_size() const { + size_t data_size = 0; + size_t index_size = 0; + + // lookup type dicts + index_size += 2 * sizeof(std::map); + for (auto &type_mapping : vertex_type_dictionary) { + index_size += sizeof(unsigned short int); + index_size += sizeof(char) * (type_mapping.second.length()); + } + + // vertex-properties: + index_size += sizeof(std::unordered_map>); + for (const auto &property_mapping : vertex_properties) { + index_size += sizeof(uint64_t) + sizeof(std::unordered_map); + for (const auto &property : property_mapping.second) { + data_size += sizeof(char) * property.first.length() + sizeof(property.second); + } + } + + return {index_size, data_size}; + } + + void print_type_dict() { + std::cout << "VertexType-Dict: " << std::endl; + for (auto const &entry : vertex_type_dictionary) { + std::cout << entry.first << " -> " << entry.second << std::endl; + } + } + + void print_vertex_by_id(const uint64_t id) { + std::cout << "-------------- Vertex ID: " << id << " --------------" << std::endl; + VertexWithProperties v = get_vertex_with_properties(id); + std::cout << "Vertex-ID: \t" << v.getID() << std::endl; + std::cout << "Type: \t" << get_vertex_type(v.getType()) << std::endl; + std::cout << "Properties: "; + for (const auto entry : v.getProperties()) { + auto value = entry.second; + std::cout << "{" << entry.first << ": "; + std::visit(PropertyValueVisitor{}, value); + std::cout << "}"; + } + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_VERTICES_CONTAINER_H \ No newline at end of file diff --git a/include/core/storage/graph/vertex/vertices_hashmap_container.h b/include/core/storage/graph/vertex/vertices_hashmap_container.h new file mode 100644 index 00000000..c3fc6696 --- /dev/null +++ b/include/core/storage/graph/vertex/vertices_hashmap_container.h @@ -0,0 +1,74 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file vertices_hashmap_container.h + * @brief storing vertices using a hashmap + * @todo + */ + +#ifndef MORPHSTORE_VERTICES_HASHMAP_CONTAINER_H +#define MORPHSTORE_VERTICES_HASHMAP_CONTAINER_H + +#include "vertex.h" +#include "vertices_container.h" + +#include +#include + +namespace morphstore { + + class VerticesHashMapContainer : public VerticesContainer { + protected: + std::unordered_map vertices; + + public: + std::string container_description() const override { return "unordered_map"; } + + void allocate(const uint64_t expected_vertices) override { + VerticesContainer::allocate(expected_vertices); + this->vertices.reserve(expected_vertices); + } + + void insert_vertex(const Vertex v) override { vertices[v.getID()] = v; } + + Vertex get_vertex(uint64_t id) override { return vertices[id]; } + + bool exists_vertex(const uint64_t id) const override { + if (vertices.find(id) == vertices.end()) { + return false; + } + return true; + } + + uint64_t vertex_count() const { return vertices.size(); } + + std::pair get_size() const override { + auto [index_size, data_size] = VerticesContainer::get_size(); + + // container for indexes: + index_size += sizeof(std::unordered_map); + // index size of vertex: size of id and sizeof pointer + index_size += vertices.size() * sizeof(uint64_t); + data_size += vertices.size() * Vertex::get_data_size_of_vertex(); + + return {index_size, data_size}; + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_VERTICES_HASHMAP_CONTAINER_H \ No newline at end of file diff --git a/include/core/storage/graph/vertex/vertices_vectorarray_container.h b/include/core/storage/graph/vertex/vertices_vectorarray_container.h new file mode 100644 index 00000000..0a89042c --- /dev/null +++ b/include/core/storage/graph/vertex/vertices_vectorarray_container.h @@ -0,0 +1,131 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file vertices_vectorarray_container.h + * @brief storing vertices using a vector of arrays + * @todo + */ + +#ifndef MORPHSTORE_VERTICES_VECTORARRAY_CONTAINER_H +#define MORPHSTORE_VERTICES_VECTORARRAY_CONTAINER_H + +#include "vertex.h" +#include "vertices_container.h" + +#include +#include +#include + +namespace morphstore { + + class VerticesVectorArrayContainer : public VerticesContainer { + protected: + std::vector vertices; + + static const inline uint64_t vertex_array_size = 4096; + static const inline uint64_t vertices_per_array = vertex_array_size / sizeof(Vertex); + + uint64_t number_of_vertices = 0; + Vertex *current_array; + uint64_t current_array_offset = 0; + + Vertex *allocate_vertex_array() { + auto array_pointer = (Vertex *)std::aligned_alloc(sizeof(Vertex), vertices_per_array * sizeof(Vertex)); + + vertices.push_back(array_pointer); + // std::cout << " Added a page" << std::endl; + // std::cout.flush(); + + return array_pointer; + } + + inline uint64_t get_vertex_vector_number(uint64_t vertex_id) const { return vertex_id / vertices_per_array; } + + inline uint64_t get_pos_in_array(uint64_t vertex_id) const { return vertex_id % vertices_per_array; } + + public: + // TODO: make array_size based on constructor + // VerticesVectorArrayContainer(array_size) + + ~VerticesVectorArrayContainer() { + // TODO: find memory leak (destructor seems not to be called) + std::cout << "freeing vertex pages"; + for (auto array_pointer : this->vertices) { + free(array_pointer); + } + } + + std::string container_description() const override { return "vector"; } + + void allocate(const uint64_t expected_vertices) override { + VerticesContainer::allocate(expected_vertices); + this->vertices.reserve(std::ceil(expected_vertices / (double)vertices_per_array)); + + if (current_array == nullptr) + current_array = allocate_vertex_array(); + } + + void insert_vertex(Vertex v) { + // equals current array is full + if (current_array_offset == vertices_per_array) { + current_array = allocate_vertex_array(); + current_array_offset = 0; + } + // TODO: add check that there is no valid vertex stored there + // need to solve problem that aligned_alloc randomaly inits Vertices (ignores default values) + current_array[current_array_offset] = v; + current_array_offset++; + number_of_vertices++; + } + + Vertex get_vertex(uint64_t id) override { + uint64_t array_number = get_vertex_vector_number(id); + uint64_t pos_in_array = get_pos_in_array(id); + + // assert (pos_in_array < vertices_per_array); + assert(array_number < vertices.size()); + + return vertices.at(array_number)[pos_in_array]; + } + + bool exists_vertex(const uint64_t id) const override { + // assumes no deletion! else retrieve vertrex at position and check isValid() + return number_of_vertices > id; + } + + uint64_t vertex_count() const override { return number_of_vertices; } + + std::pair get_size() const override { + auto [index_size, data_size] = VerticesContainer::get_size(); + + // vector count, current_array_offset + index_size += 2 * sizeof(uint64_t); + // current_array + index_size += sizeof(Vertex *); + + index_size += sizeof(std::vector); + index_size += vertices.size() * sizeof(Vertex *); + // allocated memory for vertices + data_size += vertices.size() * Vertex::get_data_size_of_vertex() * vertices_per_array; + + return {index_size, data_size}; + } + }; +} // namespace morphstore + +#endif // MORPHSTORE_VERTICES_VECTORARRAY_CONTAINER_H \ No newline at end of file diff --git a/include/core/utils/equality_check.h b/include/core/utils/equality_check.h index 8e2a7ab3..80457c41 100644 --- a/include/core/utils/equality_check.h +++ b/include/core/utils/equality_check.h @@ -31,6 +31,8 @@ #include #include +#include + namespace morphstore { struct equality_check { @@ -77,6 +79,9 @@ namespace morphstore { }; std::ostream & operator<<( std::ostream & os, const equality_check & ec ) { + const char *data_ok_str = + (ec.m_CountValuesEqual && ec.m_SizeUsedByteEqual) ? equality_check::ok_str(ec.m_DataEqual) : "undefined"; + os << "countValues: " << equality_check::ok_str( ec.m_CountValuesEqual ) << " (expected " << ec.m_CountValuesExp @@ -86,10 +91,30 @@ namespace morphstore { << " (expected " << ec.m_SizeUsedByteExp << ", found " << ec.m_SizeUsedByteFnd << ')' << std::endl - << "data: " << equality_check::ok_str( ec.m_DataEqual ) - << " (this check is only valid, if countValues and sizeUsedByte are ok)" + << "data: " << data_ok_str << std::endl; return os; } + + template void assert_columns_equal(const column *expected_col, const column *actual_col) { + equality_check ec(expected_col, actual_col); + std::cout << ec; + if (!ec.good()) { + uint64_t *expected = expected_col->get_data(); + uint64_t *actual = actual_col->get_data(); + + assert(ec.m_CountValuesEqual); + assert(ec.m_SizeUsedByteEqual); + + // printing only different entries + for (uint64_t i = 0; i < expected_col->get_count_values(); i++) { + if (!(expected[i] == actual[i])) { + std::cout << "pos: " << i << " expected: " << expected[i] << " actual: " << actual[i] << std::endl; + } + } + // print_columns(print_buffer_base::decimal, actual_col, expected_col, "actual", "expected"); + assert(false); + } + } } #endif //MORPHSTORE_CORE_UTILS_EQUALITY_CHECK_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b92fa98b..cfbc2244 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,6 +2,8 @@ add_subdirectory( calibration ) add_subdirectory( examples ) add_subdirectory( microbenchmarks ) +add_subdirectory( microbenchmarks/graph ) + # There might be automatically generated subdirectories for the Star Schema # Benchmark (SSB), possibly with different scale factors. The following lines # add all of them. diff --git a/src/microbenchmarks/graph/CMakeLists.txt b/src/microbenchmarks/graph/CMakeLists.txt new file mode 100644 index 00000000..f0ba91b7 --- /dev/null +++ b/src/microbenchmarks/graph/CMakeLists.txt @@ -0,0 +1,26 @@ +if ( BUILD_ALL OR BUILD_MICROBMS ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/src/microbenchmarks/graph/vertex_storage_benchmark_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/src/microbenchmarks/graph/edge_storage_benchmark_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/src/microbenchmarks/graph/compress_csr_benchmark_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/src/microbenchmarks/graph/compress_adjList_benchmark_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/src/microbenchmarks/graph/bfs_benchmark_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/src/microbenchmarks/graph/page_rank_benchmark_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/src/microbenchmarks/graph/bfs_csr_partial_compression_benchmark_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/src/microbenchmarks/graph/page_rank_csr_partial_compression_benchmark_app ) + + add_executable( vertex_storage_benchmark_app vertex_storage_benchmark.cpp) + add_executable( edge_storage_benchmark_app edge_storage_benchmark.cpp) + add_executable( compress_csr_benchmark_app csr_graph_compression_benchmark.cpp) + add_executable( compress_adjList_benchmark_app adjList_graph_compression_benchmark.cpp) + add_executable( bfs_benchmark_app bfs_benchmark.cpp) + add_executable( page_rank_benchmark_app page_rank_benchmark.cpp) + add_executable( bfs_csr_partial_compression_benchmark_app bfs_csr_partial_compression_benchmark.cpp) + add_executable( page_rank_csr_partial_compression_benchmark_app page_rank_csr_partial_compression_benchmark.cpp) + + target_link_libraries(compress_csr_benchmark_app PRIVATE "-ldl" stdc++fs) + target_link_libraries(compress_adjList_benchmark_app PRIVATE "-ldl" stdc++fs) + target_link_libraries(bfs_benchmark_app PRIVATE "-ldl" stdc++fs) + target_link_libraries(page_rank_benchmark_app PRIVATE "-ldl" stdc++fs) + target_link_libraries(bfs_csr_partial_compression_benchmark_app PRIVATE "-ldl" stdc++fs) + target_link_libraries(page_rank_csr_partial_compression_benchmark_app PRIVATE "-ldl" stdc++fs) +endif() \ No newline at end of file diff --git a/src/microbenchmarks/graph/adjList_graph_compression_benchmark.cpp b/src/microbenchmarks/graph/adjList_graph_compression_benchmark.cpp new file mode 100644 index 00000000..14719e81 --- /dev/null +++ b/src/microbenchmarks/graph/adjList_graph_compression_benchmark.cpp @@ -0,0 +1,129 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file graph_compression_benchmark.cpp + * @brief A benchmark of the csr-graph compression (using the ldbc graph) + * @todo allow different compression formats for the two csr columns; add full_iterate + */ + +#include "benchmark_helper.h" +#include +#include +#include + +using namespace morphstore; + +struct CompressionBenchmarkEntry { + GraphCompressionFormat compr_format; + uint64_t min_compr_degree; + int64_t compression_time; + double compression_ratio; + double column_ratio; + int64_t random_access_time; + int64_t full_iterate; + + std::string to_string() { + return graph_compr_f_to_string(compr_format) + "|" + std::to_string(min_compr_degree) + "|" + + std::to_string(compression_time) + "|" + std::to_string(compression_ratio) + "|" + + std::to_string(column_ratio) + "|" + std::to_string(random_access_time) + "|" + + std::to_string(full_iterate); + } +}; + +int main(void) { +#ifdef LDBC_DIR + // could be also build parameters? + const int number_of_executions = 5; + const int number_of_random_access = 1000; + + std::vector compr_formats = {GraphCompressionFormat::DELTA, GraphCompressionFormat::FOR, + GraphCompressionFormat::DYNAMIC_VBP, + GraphCompressionFormat::UNCOMPRESSED}; + + std::vector min_compr_degrees = {1024, 500, 100, 64, 1}; + + // Load ldbc graph + std::unique_ptr graph = std::make_unique(); + std::unique_ptr ldbcImport = std::make_unique(LDBC_DIR); + ldbcImport->import(*graph); + + // prepare random-access (TODO: makes only sense if column_ratio is high enough) --> also measure full iterate here + std::random_device rd; + std::uniform_int_distribution dist(0, graph->getVertexCount() - 1); + std::vector random_accesses; + for (int i = 0; i < number_of_random_access; i++) { + random_accesses.push_back(dist(rd)); + } + + std::cout << "Test compression of adjacency-list format" << std::endl; + std::cout << "Compression-Format | minimum degree for compression | compression-time | " + << "compr. ratio | column ratio | access of edges of 5000 random vertices | full-iterate " << std::endl; + + for (auto min_compr_degree : min_compr_degrees) { + for (auto current_f : compr_formats) { + if (min_compr_degree < 100 && !(current_f == GraphCompressionFormat::DYNAMIC_VBP || + current_f == GraphCompressionFormat::UNCOMPRESSED)) { + continue; + } + + graph->set_min_compr_degree(min_compr_degree); + + for (int exec = 0; exec < number_of_executions; exec++) { + CompressionBenchmarkEntry current_try; + current_try.compr_format = current_f; + current_try.min_compr_degree = graph->get_min_compr_degree(); + + // restore start state + graph->morph(GraphCompressionFormat::UNCOMPRESSED, false); + + auto start = highResClock::now(); + // "false" as otherwise blocksize would be set based on format + graph->morph(current_f, false); + // compression time + current_try.compression_time = get_duration(start); + + current_try.compression_ratio = graph->compr_ratio(); + // currently based on fixed min_compr_degree + current_try.column_ratio = graph->column_ratio(); + + // random access + start = highResClock::now(); + for (int random_pos : random_accesses) { + graph->get_outgoing_edge_ids(random_pos); + } + current_try.random_access_time = get_duration(start); + + // full iterate + auto vertex_count = graph->getVertexCount(); + start = highResClock::now(); + for (uint64_t id = 0; id < vertex_count; id++) { + graph->get_outgoing_edge_ids(id); + } + + current_try.full_iterate = get_duration(start); + + std::cout << current_try.to_string() << std::endl; + } + } + } + + return 0; +#else + throw std::invalid_argument("You forgot to define/uncomment the LDBC_DIR (at CMakeList.txt)"); +#endif +} diff --git a/src/microbenchmarks/graph/benchmark_helper.h b/src/microbenchmarks/graph/benchmark_helper.h new file mode 100644 index 00000000..01a4fb0c --- /dev/null +++ b/src/microbenchmarks/graph/benchmark_helper.h @@ -0,0 +1,47 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file benchmark_helper.h + * @brief Helper functions for graph benchmarks + * @todo + */ + +#ifndef BENCHMARK_HELPER +#define BENCHMARK_HELPER + +#include +#include +#include +#include + +namespace morphstore { + using highResClock = std::chrono::high_resolution_clock; + + int64_t get_duration(std::chrono::time_point start) { + auto stop = highResClock::now(); + return std::chrono::duration_cast(stop - start).count(); + } + + int64_t get_median(std::vector values) { + assert(values.size() > 0); + std::nth_element(values.begin(), values.begin() + values.size() / 2, values.end()); + return values[values.size() / 2]; + } +} // namespace morphstore + +#endif // BENCHMARK_HELPER diff --git a/src/microbenchmarks/graph/bfs_benchmark.cpp b/src/microbenchmarks/graph/bfs_benchmark.cpp new file mode 100644 index 00000000..4f0123c2 --- /dev/null +++ b/src/microbenchmarks/graph/bfs_benchmark.cpp @@ -0,0 +1,123 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file bfs_benchmark.cpp + * @brief A benchmark evaluating the impact of graph compression on breadth first search (using the ldbc graph) + */ + +#include "benchmark_helper.h" +#include +#include +#include +#include + +#include + +using namespace morphstore; + +struct CompressionBenchmarkEntry { + std::string graph_format; + std::string compr_format; + int64_t bfs_time; + int64_t visited_vertices; + + std::string to_string() { + return graph_format + "|" + compr_format + "|" + std::to_string(bfs_time) + "|" + + std::to_string(visited_vertices); + } +}; + +template void benchmark() { + + static_assert(std::is_base_of::value, + "type parameter of this method must be a graph format"); + +#ifdef LDBC_DIR + // could be also build parameters? + const int number_of_executions = 5; + const int number_of_start_vertices = 10; + + // order based on block-size (as adj-list format currently only supports decreasing blocksizes at `morph()`) + std::vector compr_formats = {GraphCompressionFormat::DELTA, GraphCompressionFormat::FOR, + GraphCompressionFormat::DYNAMIC_VBP, + GraphCompressionFormat::UNCOMPRESSED}; + + // Load ldbc graph + // blank lines for easier deletion of progress prints + std::cout << std::endl << std::endl; + std::shared_ptr graph = std::make_shared(); + std::unique_ptr ldbcImport = std::make_unique(LDBC_DIR); + ldbcImport->import(*graph); + std::cout << std::endl << std::endl; + + const int cycle_size = graph->getVertexCount() / number_of_start_vertices; + auto start_vertex_ids = BFS::get_list_of_every_ith_vertex(graph, cycle_size); + + std::cout << "Test impact of compression on BFS (10 start-nodes (evenly distributed regarding degree); 5x excutions)" << std::endl; + std::cout << "Graph-Format | Compression-Format | bfs-time in micro seconds| visited vertices" << std::endl; + + // for AdjacencyList format a version, where all lists are stored as vectors (not morphed -> nothing finalized) + if (std::is_same::value) { + for (int exec = 0; exec < number_of_executions; exec++) { + CompressionBenchmarkEntry current_try; + current_try.graph_format = graph->get_storage_format(); + current_try.compr_format = + graph_compr_f_to_string(GraphCompressionFormat::UNCOMPRESSED) + " (all vectors)"; + + for (auto id : start_vertex_ids) { + auto start = highResClock::now(); + current_try.visited_vertices = morphstore::BFS::compute(graph, id); + current_try.bfs_time = get_duration(start); + + // for saving into csv file, just use "> xyz.csv" at execution + std::cout << current_try.to_string() << std::endl; + } + } + } + + for (auto current_f : compr_formats) { + for (int exec = 0; exec < number_of_executions; exec++) { + CompressionBenchmarkEntry current_try; + current_try.graph_format = graph->get_storage_format(); + current_try.compr_format = graph_compr_f_to_string(current_f); + + // restore start state (not needed as this will be not timed and morphing internally goes via uncompr) + //graph->morph(GraphCompressionFormat::UNCOMPRESSED, false); + // morphing into desired format + graph->morph(current_f); + + for (auto id : start_vertex_ids) { + auto start = highResClock::now(); + current_try.visited_vertices = morphstore::BFS::compute(graph, id); + current_try.bfs_time = get_duration(start); + + // for saving into csv file, just use "> xyz.csv" at execution + std::cout << current_try.to_string() << std::endl; + } + + } + } +#else + throw std::invalid_argument("You forgot to define/uncomment the LDBC_DIR (at CMakeList.txt)"); +#endif +} + +int main(void) { + benchmark(); + benchmark(); +} diff --git a/src/microbenchmarks/graph/bfs_csr_partial_compression_benchmark.cpp b/src/microbenchmarks/graph/bfs_csr_partial_compression_benchmark.cpp new file mode 100644 index 00000000..0f77bd57 --- /dev/null +++ b/src/microbenchmarks/graph/bfs_csr_partial_compression_benchmark.cpp @@ -0,0 +1,110 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file bfs_csr_partial_compression_benchmark.cpp + * @brief A benchmark evaluating the impact of graph compression on breadth first search (using the ldbc graph) and + * only compressing on csr column + * @todo cleanup benchmark (this was only created last minute) + */ + +#include "benchmark_helper.h" +#include +#include +#include + +#include + +using namespace morphstore; + +struct CompressionBenchmarkEntry { + std::string graph_format; + std::string compr_format; + int64_t bfs_time; + int64_t visited_vertices; + + std::string to_string() { + return graph_format + "|" + compr_format + "|" + std::to_string(bfs_time) + "|" + + std::to_string(visited_vertices); + } +}; + +int main(void) { +#ifdef LDBC_DIR + // could be also build parameters? + const int number_of_executions = 5; + const int number_of_start_vertices = 10; + + // combination of uncompress + other + std::vector> compr_formats = { + {GraphCompressionFormat::DELTA, GraphCompressionFormat::UNCOMPRESSED}, + {GraphCompressionFormat::FOR, GraphCompressionFormat::UNCOMPRESSED}, + {GraphCompressionFormat::DYNAMIC_VBP, GraphCompressionFormat::UNCOMPRESSED}, + {GraphCompressionFormat::UNCOMPRESSED, GraphCompressionFormat::DELTA}, + {GraphCompressionFormat::UNCOMPRESSED, GraphCompressionFormat::FOR}, + {GraphCompressionFormat::UNCOMPRESSED, GraphCompressionFormat::DYNAMIC_VBP}}; + + // Load ldbc graph + // blank lines for easier deletion of progress prints + std::cout << std::endl << std::endl; + std::shared_ptr graph = std::make_shared(); + std::unique_ptr ldbcImport = std::make_unique(LDBC_DIR); + ldbcImport->import(*graph); + std::cout << std::endl << std::endl; + + const int cycle_size = graph->getVertexCount() / number_of_start_vertices; + auto start_vertex_ids = BFS::get_list_of_every_ith_vertex(graph, cycle_size); + + // BFS + std::cout + << "Test impact of compression on BFS (10 start-nodes (evenly distributed regarding degree); 5x excutions)" + << std::endl; + std::cout << "Graph-Format | Compression-Format | bfs-time in micro seconds| visited vertices" << std::endl; + + for (auto [offset_format, edgeId_format] : compr_formats) { + for (int exec = 0; exec < number_of_executions; exec++) { + CompressionBenchmarkEntry current_try; + current_try.graph_format = graph->get_storage_format(); + + if(offset_format != GraphCompressionFormat::UNCOMPRESSED) { + current_try.compr_format = graph_compr_f_to_string(offset_format); + current_try.graph_format += "(only offsets compressed)"; + } + else if (edgeId_format != GraphCompressionFormat::UNCOMPRESSED) { + current_try.compr_format = graph_compr_f_to_string(edgeId_format); + current_try.graph_format += "(only edgeIds compressed)"; + } + + // restore start state (not needed as this will be not timed and morphing internally goes via uncompr) + // graph->morph(GraphCompressionFormat::UNCOMPRESSED, false); + // morphing into desired format + graph->morph(offset_format, edgeId_format); + + for (auto id : start_vertex_ids) { + auto start = highResClock::now(); + current_try.visited_vertices = morphstore::BFS::compute(graph, id); + current_try.bfs_time = get_duration(start); + + // for saving into csv file, just use "> xyz.csv" at execution + std::cout << current_try.to_string() << std::endl; + } + } + } +#else + throw std::invalid_argument("You forgot to define/uncomment the LDBC_DIR (at CMakeList.txt)"); +#endif +} diff --git a/src/microbenchmarks/graph/csr_graph_compression_benchmark.cpp b/src/microbenchmarks/graph/csr_graph_compression_benchmark.cpp new file mode 100644 index 00000000..bd932cef --- /dev/null +++ b/src/microbenchmarks/graph/csr_graph_compression_benchmark.cpp @@ -0,0 +1,117 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file csr_graph_compression_benchmark.cpp + * @brief A benchmark of the csr-graph compression (using the ldbc graph) + * @todo allow different compression formats for the two csr columns + */ + +#include "benchmark_helper.h" +#include +#include +#include + +using namespace morphstore; + +struct CompressionBenchmarkEntry { + GraphCompressionFormat compr_format; + int64_t compression_time; + double offset_col_compression_ratio; + double edgeId_col_compression_ratio; + int64_t random_access_time; + int64_t full_iterate; + + std::string to_string() { + return graph_compr_f_to_string(compr_format) + "|" + std::to_string(compression_time) + "|" + + std::to_string(offset_col_compression_ratio) + "|" + std::to_string(edgeId_col_compression_ratio) + + "|" + std::to_string(random_access_time) + "|" + std::to_string(full_iterate); + } +}; + +int main(void) { +#ifdef LDBC_DIR + // could be also build parameters? + const int number_of_executions = 5; + const int number_of_random_access = 1000; + + std::vector compr_formats = {GraphCompressionFormat::UNCOMPRESSED, + GraphCompressionFormat::DELTA, GraphCompressionFormat::FOR, + GraphCompressionFormat::DYNAMIC_VBP}; + + // Load ldbc graph + std::unique_ptr graph = std::make_unique(); + std::unique_ptr ldbcImport = std::make_unique(LDBC_DIR); + ldbcImport->import(*graph); + + // prepare random-access + std::random_device rd; + std::uniform_int_distribution dist(0, graph->getVertexCount() - 1); + std::vector random_accesses; + for (int i = 0; i < number_of_random_access; i++) { + random_accesses.push_back(dist(rd)); + } + + std::cout << "Test compression of ldbc-graph in CSR format (times in " + "micro-seconds)" + << std::endl; + std::cout << "Compression-Format | compression-time | offset-column compr. ratio" + << " | edgeId-column compr. ratio | access of edges of " + << std::to_string(number_of_random_access) + " random vertices" + << " | full iterate" << std::endl; + + for (auto current_f : compr_formats) { + for (int exec = 0; exec < number_of_executions; exec++) { + CompressionBenchmarkEntry current_try; + current_try.compr_format = current_f; + // restore start state + graph->morph(GraphCompressionFormat::UNCOMPRESSED); + + auto start = highResClock::now(); + graph->morph(current_f); + // compression time + current_try.compression_time = get_duration(start); + + // compression-ratios + current_try.offset_col_compression_ratio = graph->offset_column_compr_ratio(); + current_try.edgeId_col_compression_ratio = graph->edgeId_column_compr_ratio(); + + // random access + start = highResClock::now(); + for (int random_pos : random_accesses) { + graph->get_outgoing_edge_ids(random_pos); + } + current_try.random_access_time = get_duration(start); + + // full iterate + auto vertex_count = graph->getVertexCount(); + start = highResClock::now(); + for (uint64_t id = 0; id < vertex_count; id++) { + graph->get_outgoing_edge_ids(id); + } + + current_try.full_iterate = get_duration(start); + + std::cout << current_try.to_string() << std::endl; + } + } + + return 0; +#else + throw std::invalid_argument("You forgot to define/uncomment the LDBC_DIR (at CMakeList.txt)"); +#endif +} diff --git a/src/microbenchmarks/graph/edge_storage_benchmark.cpp b/src/microbenchmarks/graph/edge_storage_benchmark.cpp new file mode 100644 index 00000000..4d359d6d --- /dev/null +++ b/src/microbenchmarks/graph/edge_storage_benchmark.cpp @@ -0,0 +1,112 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file edge_storage_benchmark.cpp + * @brief A little mirco benchmark of the edge storage. + * @todo Fix edge id generation for benchmark to work + */ + +#include "benchmark_helper.h" +#include +#include +#include + +using namespace morphstore; + +int main(void) { + // use BenchmarkEntry struct instead of appending to string + + int number_of_executions = 5; + + std::cout << "Test edge storage structure (median of 5 for full_iterate and random access)" << std::endl; + std::cout << "Container type | edge_count | loading time in μs | memory usage in bytes | full_iterate in μs | " + "random access 1/10 of the edge count in μs" + << std::endl; + + std::vector storage_types = {EdgesContainerType::HashMapContainer, + EdgesContainerType::VectorArrayContainer}; + + std::vector edge_counts = {10000, 100000, 1000000, 2000000, 5000000, 10000000, 15000000}; + + for (int edge_count : edge_counts) { + std::random_device rd; + std::uniform_int_distribution dist(0, edge_count - 1); + std::vector random_accesses; + for (int i = 0; i < edge_count; i++) { + random_accesses.push_back(dist(rd)); + } + + for (auto storage_type : storage_types) { + std::unique_ptr graph = std::make_unique(storage_type); + graph->allocate_graph_structure(1, edge_count); + + std::string measurement_entry = graph->edges_container_description() + " | "; + measurement_entry += std::to_string(edge_count) + " | "; + + auto vertex_id = graph->add_vertex(0); + std::vector edges; + + for (int i = 0; i < edge_count; i++) { + edges.push_back(Edge(vertex_id, vertex_id, 0)); + } + + auto start = highResClock::now(); + graph->add_edges(vertex_id, edges); + // loading time + measurement_entry += std::to_string(get_duration(start)) + " | "; + + // size + auto [index_size, data_size] = graph->get_size_of_graph(); + measurement_entry += std::to_string(index_size + data_size) + " | "; + + std::vector durations; + + // full iterate + for (int exec = 0; exec < number_of_executions; exec++) { + auto start = highResClock::now(); + // iterate + for (int i = 0; i < edge_count; i++) { + graph->get_edge(i); + } + durations.push_back(get_duration(start)); + } + + measurement_entry += std::to_string(get_median(durations)) + " | "; + + // random access + + durations.clear(); + + for (int exec = 0; exec < number_of_executions; exec++) { + auto start = highResClock::now(); + + for (int random_pos : random_accesses) { + graph->get_edge(random_pos); + } + + durations.push_back(get_duration(start)); + } + + measurement_entry += std::to_string(get_median(durations)); + + std::cout << measurement_entry << std::endl; + } + } + + return 0; +} diff --git a/src/microbenchmarks/graph/page_rank_benchmark.cpp b/src/microbenchmarks/graph/page_rank_benchmark.cpp new file mode 100644 index 00000000..36706951 --- /dev/null +++ b/src/microbenchmarks/graph/page_rank_benchmark.cpp @@ -0,0 +1,115 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file page_rank_benchmark.cpp + * @brief A benchmark evaluating the impact of graph compression on PageRank (using the ldbc graph) + */ + +#include "benchmark_helper.h" +#include +#include +#include +#include + +#include + +using namespace morphstore; + +struct CompressionBenchmarkEntry { + std::string graph_format; + std::string compr_format; + uint64_t page_rank_time, ran_iterations; + + std::string to_string() { + return graph_format + "|" + compr_format + "|" + std::to_string(page_rank_time) + "|" + + std::to_string(ran_iterations); + } +}; + +template void benchmark() { + + static_assert(std::is_base_of::value, + "type parameter of this method must be a graph format"); + +#ifdef LDBC_DIR + // could be also build parameters? + const int number_of_executions = 5; + + // order based on block-size (as adj-list format currently only supports decreasing blocksizes at `morph()`) + std::vector compr_formats = {GraphCompressionFormat::DELTA, GraphCompressionFormat::FOR, + GraphCompressionFormat::DYNAMIC_VBP, + GraphCompressionFormat::UNCOMPRESSED}; + + // Load ldbc graph + // blank lines for easier deletion of progress prints + std::cout << std::endl << std::endl; + std::shared_ptr graph = std::make_shared(); + std::unique_ptr ldbcImport = std::make_unique(LDBC_DIR); + ldbcImport->import(*graph); + std::cout << std::endl << std::endl; + + std::cout << "Test impact of compression on PageRank (5x executions)" << std::endl; + std::cout << "Graph-Format | Compression-Format | page_rank-time in micro seconds | iterations ran" << std::endl; + + // for adj-list a version, where all lists are stored as vectors (not morphed -> nothing finalized) + if (std::is_same::value) { + for (int exec = 0; exec < number_of_executions; exec++) { + CompressionBenchmarkEntry current_try; + current_try.graph_format = graph->get_storage_format(); + current_try.compr_format = + graph_compr_f_to_string(GraphCompressionFormat::UNCOMPRESSED) + " (all vectors)"; + + auto start = highResClock::now(); + // current default values for PageRank: max_iterations = 20, damping_factor = 0.85, tolerance = 0.0001 + current_try.ran_iterations = morphstore::PageRank::compute(graph).ran_iterations; + current_try.page_rank_time = get_duration(start); + + // for saving into csv file, just use "> xyz.csv" at execution + std::cout << current_try.to_string() << std::endl; + } + } + + for (auto current_f : compr_formats) { + for (int exec = 0; exec < number_of_executions; exec++) { + CompressionBenchmarkEntry current_try; + current_try.graph_format = graph->get_storage_format(); + current_try.compr_format = graph_compr_f_to_string(current_f); + + // restore start state (not needed as this will be not timed and morphing internally goes via uncompr) + // graph->morph(GraphCompressionFormat::UNCOMPRESSED, false); + // morphing into desired format + graph->morph(current_f); + + auto start = highResClock::now(); + // current default values for PageRank: max_iterations = 20, damping_factor = 0.85, tolerance = 0.0001 + current_try.ran_iterations = morphstore::PageRank::compute(graph).ran_iterations; + current_try.page_rank_time = get_duration(start); + + // for saving into csv file, just use "> xyz.csv" at execution + std::cout << current_try.to_string() << std::endl; + } + } +#else + throw std::invalid_argument("You forgot to define/uncomment the LDBC_DIR (at CMakeList.txt)"); +#endif +} + +int main(void) { + benchmark(); + benchmark(); +} diff --git a/src/microbenchmarks/graph/page_rank_csr_partial_compression_benchmark.cpp b/src/microbenchmarks/graph/page_rank_csr_partial_compression_benchmark.cpp new file mode 100644 index 00000000..9895135e --- /dev/null +++ b/src/microbenchmarks/graph/page_rank_csr_partial_compression_benchmark.cpp @@ -0,0 +1,102 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file page_rank_csr_partial_compression_benchmark.cpp + * @brief A benchmark evaluating the impact of graph compression on PageRank (using the ldbc graph) and + * only compressing on csr column + * @todo cleanup benchmark (this was only created last minute) + */ + +#include "benchmark_helper.h" +#include +#include +#include + +#include + +using namespace morphstore; + +struct CompressionBenchmarkEntry { + std::string graph_format; + std::string compr_format; + uint64_t page_rank_time, ran_iterations; + + std::string to_string() { + return graph_format + "|" + compr_format + "|" + std::to_string(page_rank_time) + "|" + + std::to_string(ran_iterations); + } +}; + +int main(void) { +#ifdef LDBC_DIR + // could be also build parameters? + const int number_of_executions = 5; + + // combination of uncompress + other + std::vector> compr_formats = { + {GraphCompressionFormat::DELTA, GraphCompressionFormat::UNCOMPRESSED}, + {GraphCompressionFormat::FOR, GraphCompressionFormat::UNCOMPRESSED}, + {GraphCompressionFormat::DYNAMIC_VBP, GraphCompressionFormat::UNCOMPRESSED}, + {GraphCompressionFormat::UNCOMPRESSED, GraphCompressionFormat::DELTA}, + {GraphCompressionFormat::UNCOMPRESSED, GraphCompressionFormat::FOR}, + {GraphCompressionFormat::UNCOMPRESSED, GraphCompressionFormat::DYNAMIC_VBP}}; + + // Load ldbc graph + // blank lines for easier deletion of progress prints + std::cout << std::endl << std::endl; + std::shared_ptr graph = std::make_shared(); + std::unique_ptr ldbcImport = std::make_unique(LDBC_DIR); + ldbcImport->import(*graph); + std::cout << std::endl << std::endl; + + // PageRank + std::cout << "Test impact of compression on PageRank (5x executions)" << std::endl; + std::cout << "Graph-Format | Compression-Format | page_rank-time in micro seconds | iterations ran" << std::endl; + + for (auto [offset_format, edgeId_format] : compr_formats) { + for (int exec = 0; exec < number_of_executions; exec++) { + CompressionBenchmarkEntry current_try; + current_try.graph_format = graph->get_storage_format(); + + if(offset_format != GraphCompressionFormat::UNCOMPRESSED) { + current_try.compr_format = graph_compr_f_to_string(offset_format); + current_try.graph_format += "(only offsets compressed)"; + } + else if (edgeId_format != GraphCompressionFormat::UNCOMPRESSED) { + current_try.compr_format = graph_compr_f_to_string(edgeId_format); + current_try.graph_format += "(only edgeIds compressed)"; + } + + // restore start state (not needed as this will be not timed and morphing internally goes via uncompr) + // graph->morph(GraphCompressionFormat::UNCOMPRESSED, false); + // morphing into desired format + graph->morph(offset_format, edgeId_format); + + auto start = highResClock::now(); + // current default values for PageRank: max_iterations = 20, damping_factor = 0.85, tolerance = 0.0001 + current_try.ran_iterations = morphstore::PageRank::compute(graph).ran_iterations; + current_try.page_rank_time = get_duration(start); + + // for saving into csv file, just use "> xyz.csv" at execution + std::cout << current_try.to_string() << std::endl; + } + } +#else + throw std::invalid_argument("You forgot to define/uncomment the LDBC_DIR (at CMakeList.txt)"); +#endif +} diff --git a/src/microbenchmarks/graph/vertex_storage_benchmark.cpp b/src/microbenchmarks/graph/vertex_storage_benchmark.cpp new file mode 100644 index 00000000..d925e2d8 --- /dev/null +++ b/src/microbenchmarks/graph/vertex_storage_benchmark.cpp @@ -0,0 +1,106 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file vertex_storage_benchmark.cpp + * @brief A little mirco benchmark of the vertex storage (hashmap vs vector>). + */ + +#include "benchmark_helper.h" +#include +#include +#include + +using namespace morphstore; + +int main(void) { + // TODO: use core/utils/monitoring.h ? or a "time_it" function to stop a given function + + int number_of_executions = 5; + + std::cout << "Test vertex storage structure (median of 5 for full_iterate and random access)" << std::endl; + std::cout << "Container type | vertex_count | loading time in μs | memory usage in bytes | full_iterate in μs | " + "random access 1/10 of the vertex count in μs" + << std::endl; + + std::vector storage_types = {VerticesContainerType::HashMapContainer, + VerticesContainerType::VectorArrayContainer}; + + std::vector vertex_counts = {10000, 100000, 1000000, 2000000, 5000000, 10000000, 15000000}; + + for (int vertex_count : vertex_counts) { + std::random_device rd; + std::uniform_int_distribution dist(0, vertex_count - 1); + std::vector random_accesses; + for (int i = 0; i < vertex_count; i++) { + random_accesses.push_back(dist(rd)); + } + + for (auto storage_type : storage_types) { + std::unique_ptr graph = std::make_unique(storage_type); + graph->allocate_graph_structure(vertex_count, 0); + + std::string measurement_entry = graph->vertices_container_description() + " | "; + measurement_entry += std::to_string(vertex_count) + " | "; + + auto start = highResClock::now(); + for (int i = 0; i < vertex_count; i++) { + graph->add_vertex(); + } + // loading time + measurement_entry += std::to_string(get_duration(start)) + " | "; + + // size + auto [index_size, data_size] = graph->get_size_of_graph(); + measurement_entry += std::to_string(index_size + data_size) + " | "; + + std::vector durations; + + // full iterate + for (int exec = 0; exec < number_of_executions; exec++) { + auto start = highResClock::now(); + // iterate + for (int i = 0; i < vertex_count; i++) { + graph->get_vertex(i); + } + durations.push_back(get_duration(start)); + } + + measurement_entry += std::to_string(get_median(durations)) + " | "; + + // random access + + durations.clear(); + + for (int exec = 0; exec < number_of_executions; exec++) { + auto start = highResClock::now(); + + for (int random_pos : random_accesses) { + graph->get_vertex(random_pos); + } + + durations.push_back(get_duration(start)); + } + + measurement_entry += std::to_string(get_median(durations)); + + std::cout << measurement_entry << std::endl; + } + } + + return 0; +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8580fab2..ba42a897 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,4 +5,9 @@ add_subdirectory( core/persistence ) add_subdirectory( core/storage ) add_subdirectory( core/utils ) -add_subdirectory(vector) \ No newline at end of file +add_subdirectory( core/storage/graph/simple ) +add_subdirectory( core/storage/graph/ldbc ) +add_subdirectory( core/operators/graph/simple ) +add_subdirectory( core/operators/graph/ldbc ) + +add_subdirectory(vector) diff --git a/test/core/morphing/CMakeLists.txt b/test/core/morphing/CMakeLists.txt index 87bd57df..f314130f 100644 --- a/test/core/morphing/CMakeLists.txt +++ b/test/core/morphing/CMakeLists.txt @@ -1,27 +1,40 @@ if ( CTEST_ALL OR CTEST_MORPHING ) - FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/morphing/delta_test_app ) - FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/morphing/k_wise_ns_test_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/morphing/morph_saving_offsets_test_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/morphing/morph_column_block_test_app ) FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/morphing/vbp_test_app ) - add_executable( delta_test_app delta_test.cpp ) - add_executable( k_wise_ns_test_app k_wise_ns_test.cpp ) + add_executable( morph_saving_offsets_test_app morph_saving_offsets_test.cpp ) + add_executable( morph_column_block_test_app morph_column_block_test.cpp ) add_executable( vbp_test_app vbp_test.cpp ) + + if (SSE) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/morphing/delta_test_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/morphing/k_wise_ns_test_app ) + + add_executable( delta_test_app delta_test.cpp ) + add_executable( k_wise_ns_test_app k_wise_ns_test.cpp ) + + target_compile_options( delta_test_app PRIVATE + # space to add custom flags for THIS SPECIFIC TARGET + ) + target_compile_options( k_wise_ns_test_app PRIVATE + # space to add custom flags for THIS SPECIFIC TARGET + ) + + target_link_libraries( delta_test_app PRIVATE "-ldl" ) + target_link_libraries( k_wise_ns_test_app PRIVATE "-ldl" ) + + add_test( delta_test delta_test_app ) + add_test( k_wise_ns_test k_wise_ns_test_app ) + endif(SSE) - target_compile_options( delta_test_app PRIVATE - # space to add custom flags for THIS SPECIFIC TARGET - ) - target_compile_options( k_wise_ns_test_app PRIVATE - # space to add custom flags for THIS SPECIFIC TARGET - ) target_compile_options( vbp_test_app PRIVATE # space to add custom flags for THIS SPECIFIC TARGET ) - target_link_libraries( delta_test_app PRIVATE "-ldl" ) - target_link_libraries( k_wise_ns_test_app PRIVATE "-ldl" ) target_link_libraries( vbp_test_app PRIVATE "-ldl" ) - add_test( delta_test delta_test_app ) - add_test( k_wise_ns_test k_wise_ns_test_app ) + add_test( morph_column_block_test morph_column_block_test_app ) + add_test( morph_saving_offsets_test morph_saving_offsets_test_app ) add_test( vbp_test vbp_test_app ) endif() \ No newline at end of file diff --git a/test/core/morphing/morph_column_block_test.cpp b/test/core/morphing/morph_column_block_test.cpp new file mode 100644 index 00000000..5d599c5e --- /dev/null +++ b/test/core/morphing/morph_column_block_test.cpp @@ -0,0 +1,86 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file morph_saving_offsets_test.cpp + * @brief Tests morphing blocks based on morph_saving_offsets. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace morphstore; +using namespace vectorlib; + +using ve = scalar>; +using compr_f = DEFAULT_DELTA_DYNAMIC_VBP_F(ve); + +int main(void) { + // TODO: 2 test variants (column_size 3000 and 3072) + auto orig_column_size = 3000; + + auto orig_col = generate_sorted_unique(orig_column_size); + + // !! morph saving offsets needs to look if last block can be actually morphed (if not complete -> undefined + // behaviour?) + + auto compr_col_with_offsets = morph_saving_offsets(orig_col); + auto block_count = compr_col_with_offsets->get_block_offsets()->size(); + assert(block_count == round_up_div(orig_column_size, compr_f::m_BlockSize)); + assert(compr_col_with_offsets->last_block_compressed() == (orig_column_size % compr_f::m_BlockSize == 0)); + + // asserting correctness of decompressing a single block + + auto block_size = compr_col_with_offsets->get_block_size(); + + for (uint64_t block = 0; block < compr_col_with_offsets->get_block_offsets()->size(); block++) { + auto value_count = block_size; + + if (block == block_count -1 && !compr_col_with_offsets->last_block_compressed()) { + value_count = compr_col_with_offsets->get_column()->get_count_values() % block_size; + } + + std::cout << "Checking block " << block << " range: " << block * block_size << " .. " + << (block * block_size + block_size) - 1 << std::endl; + + auto decompr_col_block = decompress_column_block(compr_col_with_offsets, block); + + auto expected_col = generate_sorted_unique(value_count, block * 1024); + assert_columns_equal(expected_col, decompr_col_block); + } + + // checking decompressing multiple sequentiell blocks + std::cout << "Checking decompressing multiple blocks " << std::endl; + auto multiple_col_blocks = decompress_column_blocks(compr_col_with_offsets, 0, block_count - 1); + assert_columns_equal(orig_col, multiple_col_blocks); + + return 0; +} \ No newline at end of file diff --git a/test/core/morphing/morph_saving_offsets_test.cpp b/test/core/morphing/morph_saving_offsets_test.cpp new file mode 100644 index 00000000..99384dac --- /dev/null +++ b/test/core/morphing/morph_saving_offsets_test.cpp @@ -0,0 +1,77 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file morph_saving_offsets_test.cpp + * @brief Tests morph_saving_offsets. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace morphstore; +using namespace vectorlib; + +using ve = scalar>; +using compr_f = DEFAULT_DELTA_DYNAMIC_VBP_F(ve); + +int main(void) { + // 3 whole blocks + // TODO: also check for partial block (2 variants) + auto orig_column_size = 3000; + auto orig_col = generate_sorted_unique(orig_column_size); + + auto compr_col_with_offsets = morph_saving_offsets(orig_col); + + assert(compr_col_with_offsets->get_block_offsets()->size() == + round_up_div(orig_column_size, compr_f::m_BlockSize)); + assert(compr_col_with_offsets->last_block_compressed() == (orig_column_size % compr_f::m_BlockSize == 0)); + + // !! currently not equal, as morph_batch on delta block start values very likely depend on previous block + // delta: morphing multi blocks at once -> block start value = diff to previous block + // morphing one block at a time -> block start value = first value of the block + // example: col 0..2047 + // --> morph(): start-values: 0 ; 1 + // --> morph_saving_offsets(): start-values: 0 ; 1024 + /* std::cout << "Checking morph_saving_offset() result column equals the one from morph()" << std::endl; + auto compr_col = morph(orig_col); + assert_columns_equal(compr_col, compr_col_with_offsets->get_column()); */ + + // TODO: get this one to work !! -> block-wise decompression needed + // currently BUG: 0. block: ok , 1. block: +1023, 2. block: + 3070 + std::cout << "Checking morph_saving_offset() decompressed equals original column" << std::endl; + auto decompr_col = morph_saving_offsets(compr_col_with_offsets); + // uncompr_f blocksize == 1 --> no need to save block offsets + assert(decompr_col->get_block_offsets()->size() == 0); + assert_columns_equal(orig_col, decompr_col->get_column()); + + return 0; +} \ No newline at end of file diff --git a/test/core/operators/graph/ldbc/CMakeLists.txt b/test/core/operators/graph/ldbc/CMakeLists.txt new file mode 100644 index 00000000..b32395e3 --- /dev/null +++ b/test/core/operators/graph/ldbc/CMakeLists.txt @@ -0,0 +1,17 @@ +if ( CTEST_ALL OR CTEST_OPERATORS ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/operators/graph/ldbc/bfs_ldbc_csr_graph_test_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/operators/graph/ldbc/bfs_ldbc_adj_graph_test_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/operators/graph/ldbc/page_rank_ldbc_graph_test_app ) + + add_executable( bfs_ldbc_csr_graph_test_app bfs_ldbc_csr_graph_test.cpp) + add_executable( bfs_ldbc_adj_graph_test_app bfs_ldbc_adj_graph_test.cpp) + add_executable( page_rank_ldbc_graph_test_app page_rank_ldbc_graph_test.cpp) + + target_link_libraries(bfs_ldbc_csr_graph_test_app PRIVATE "-ldl" stdc++fs) + target_link_libraries(bfs_ldbc_adj_graph_test_app PRIVATE "-ldl" stdc++fs) + target_link_libraries(page_rank_ldbc_graph_test_app PRIVATE "-ldl" stdc++fs) + + add_test( bfs_ldbc_csr_graph_test_app bfs_ldbc_csr_graph_test_app ) + add_test( bfs_ldbc_adj_graph_test_app bfs_ldbc_adj_graph_test_app ) + add_test( page_rank_ldbc_graph_test_app page_rank_ldbc_graph_test_app ) +endif() \ No newline at end of file diff --git a/test/core/operators/graph/ldbc/bfs_ldbc_adj_graph_test.cpp b/test/core/operators/graph/ldbc/bfs_ldbc_adj_graph_test.cpp new file mode 100644 index 00000000..f22bc1ff --- /dev/null +++ b/test/core/operators/graph/ldbc/bfs_ldbc_adj_graph_test.cpp @@ -0,0 +1,30 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file bfs_ldbc__adj_graph_test.cpp + * @brief Test for bfs of social network graph in adj list format + * @todo + */ +#include +#include "bfs_ldbc_graph_test.h" + +int main( void ){ + bfs_ldbc_graph_test(); + + return 0; +} diff --git a/test/core/operators/graph/ldbc/bfs_ldbc_csr_graph_test.cpp b/test/core/operators/graph/ldbc/bfs_ldbc_csr_graph_test.cpp new file mode 100644 index 00000000..99c6f276 --- /dev/null +++ b/test/core/operators/graph/ldbc/bfs_ldbc_csr_graph_test.cpp @@ -0,0 +1,29 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file bfs_ldbc_csr_graph_test.cpp + * @brief Test for bfs of social network graph in csr list format + * @todo + */ +#include +#include "bfs_ldbc_graph_test.h" + +int main( void ){ + bfs_ldbc_graph_test(); + return 0; +} diff --git a/test/core/operators/graph/ldbc/bfs_ldbc_graph_test.h b/test/core/operators/graph/ldbc/bfs_ldbc_graph_test.h new file mode 100644 index 00000000..0deebf8d --- /dev/null +++ b/test/core/operators/graph/ldbc/bfs_ldbc_graph_test.h @@ -0,0 +1,62 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file bfs_ldbc_graph_test.cpp + * @brief Test methods for bfs on social network graph + * @todo + */ + +#include +#include +#include + +void print_header(std::string storageFormat) { + + std::cout << "\n"; + std::cout << "**********************************************************" << std::endl; + std::cout << "* MorphStore-Operator-Test: LDBC " << storageFormat << " BFS Test *" << std::endl; + std::cout << "**********************************************************" << std::endl; + std::cout << "\n"; +} + +template void bfs_ldbc_graph_test(void) { +#ifdef LDBC_DIR + static_assert(std::is_base_of::value, + "type parameter of this method must be a graph format"); + + std::shared_ptr graph = std::make_shared(); + std::string storageFormat = graph->get_storage_format(); + + print_header(storageFormat); + + // ldbc importer: path to csv files as parameter: (don't forget the last '/' in adress path) + std::shared_ptr ldbcImport = std::make_shared(LDBC_DIR); + + // generate vertices & edges from LDBC files and insert into graph structure + ldbcImport->import(*graph); + + // some statistics (DEBUG) + std::cout << "Some statistics" << std::endl; + graph->statistics(); + + // for scale factor 1 and including static as well as dynamic part of the graph + std::cout << "Based on Vertex with id 0: " << morphstore::BFS::compute(graph, 0) << " vertices could be explored via BFS"; +#else + throw std::invalid_argument("You forgot to define/uncomment the LDBC_DIR (at CMakeList.txt)"); +#endif +} \ No newline at end of file diff --git a/test/core/operators/graph/ldbc/page_rank_ldbc_graph_test.cpp b/test/core/operators/graph/ldbc/page_rank_ldbc_graph_test.cpp new file mode 100644 index 00000000..065edab5 --- /dev/null +++ b/test/core/operators/graph/ldbc/page_rank_ldbc_graph_test.cpp @@ -0,0 +1,68 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file page_rank_ldbc_graph_test.cpp + * @brief Test methods for PageRank on the ldbc graph (only testing csr out of simplicity) + * @todo + */ +#include +#include +#include +#include + +void print_header(std::string storageFormat) { + + std::cout << "\n"; + std::cout << "**********************************************************" << std::endl; + std::cout << "* MorphStore-Operator-Test: LDBC " << storageFormat << " Page-Rank Test *" << std::endl; + std::cout << "**********************************************************" << std::endl; + std::cout << "\n"; +} + +template +void page_rank_ldbc_graph_test (void) { + + static_assert(std::is_base_of::value, "type parameter of this method must be a graph format"); + + std::shared_ptr graph = std::make_shared(); + std::string storageFormat = graph->get_storage_format(); + + print_header(storageFormat); + + // ldbc importer: path to csv files as parameter: (don't forget the last '/' in adress path) + std::shared_ptr ldbcImport = std::make_shared(LDBC_DIR); + + // generate vertices & edges from LDBC files and insert into graph structure + ldbcImport->import(*graph); + + // some statistics (DEBUG) + std::cout << "Some statistics" << std::endl; + graph->statistics(); + + + auto result = morphstore::PageRank::compute(graph, 30); + + std::cout << result.describe() << std::endl; + + // TODO: some assertions? +} + +int main() { + page_rank_ldbc_graph_test(); + return 0; +} \ No newline at end of file diff --git a/test/core/operators/graph/simple/CMakeLists.txt b/test/core/operators/graph/simple/CMakeLists.txt new file mode 100644 index 00000000..459f107a --- /dev/null +++ b/test/core/operators/graph/simple/CMakeLists.txt @@ -0,0 +1,13 @@ +if ( CTEST_ALL OR CTEST_OPERATORS ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/operators/graph/simple/bfs_simple_csr_graph_test_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/operators/graph/simple/bfs_simple_adj_graph_test_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/operators/graph/simple/page_rank_simple_csr_graph_test_app ) + + add_executable( bfs_simple_csr_graph_test_app bfs_simple_csr_graph_test.cpp) + add_executable( bfs_simple_adj_graph_test_app bfs_simple_adj_graph_test.cpp) + add_executable( page_rank_simple_csr_graph_test_app page_rank_simple_csr_graph_test.cpp) + + add_test( bfs_simple_csr_graph_test_app bfs_simple_csr_graph_test_app ) + add_test( bfs_simple_adj_graph_test_app bfs_simple_adj_graph_test_app ) + add_test( page_rank_simple_csr_graph_test_app page_rank_simple_csr_graph_test_app ) +endif() \ No newline at end of file diff --git a/test/core/operators/graph/simple/bfs_simple_adj_graph_test.cpp b/test/core/operators/graph/simple/bfs_simple_adj_graph_test.cpp new file mode 100644 index 00000000..89ebf7e0 --- /dev/null +++ b/test/core/operators/graph/simple/bfs_simple_adj_graph_test.cpp @@ -0,0 +1,30 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file bfs_simple__adj_graph_test.cpp + * @brief Test bfs on adj-list graph format + * @todo + */ +#include +#include "bfs_simple_graph_test.h" + +int main( void ){ + bfs_simple_graph_test(); + + return 0; +} diff --git a/test/core/operators/graph/simple/bfs_simple_csr_graph_test.cpp b/test/core/operators/graph/simple/bfs_simple_csr_graph_test.cpp new file mode 100644 index 00000000..9d7b0ae3 --- /dev/null +++ b/test/core/operators/graph/simple/bfs_simple_csr_graph_test.cpp @@ -0,0 +1,29 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file bfs_simple_csr_graph_test.cpp + * @brief Test for bfs on csr graph format + * @todo + */ +#include +#include "bfs_simple_graph_test.h" + +int main( void ){ + bfs_simple_graph_test(); + return 0; +} diff --git a/test/core/operators/graph/simple/bfs_simple_graph_test.h b/test/core/operators/graph/simple/bfs_simple_graph_test.h new file mode 100644 index 00000000..cfa094fb --- /dev/null +++ b/test/core/operators/graph/simple/bfs_simple_graph_test.h @@ -0,0 +1,68 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file bfs_simple_graph_test.cpp + * @brief Test methods for bfs on simple graph + * @todo + */ +#include +#include + +void print_header(std::string storageFormat) { + + std::cout << "\n"; + std::cout << "**********************************************************" << std::endl; + std::cout << "* MorphStore-Operator-Test: Simple " << storageFormat << " BFS Test *" << std::endl; + std::cout << "**********************************************************" << std::endl; + std::cout << "\n"; +} + +template +void bfs_simple_graph_test (void) { + + static_assert(std::is_base_of::value, "type parameter of this method must be a graph format"); + + std::shared_ptr graph = std::make_shared(); + print_header(graph->get_storage_format()); + + graph->allocate_graph_structure(4, 4); + + std::map edgeTypeMap = {{1, "knows"}, {2, "likes"}}; + std::map vertexTypeMap = {{0, "Person"}}; + graph->setEdgeTypeDictionary(edgeTypeMap); + graph->set_vertex_type_dictionary(vertexTypeMap); + + uint64_t v1 = graph->add_vertex(0); + uint64_t v2 = graph->add_vertex(0); + uint64_t v3 = graph->add_vertex(0); + graph->add_vertex(0); + + graph->add_edges(v1, {morphstore::Edge(v1, v2, 1)}); + graph->add_edges(v2, {morphstore::Edge(v2, v3, 2), morphstore::Edge(v2, v3, 1)}); + graph->add_edges(v3, {morphstore::Edge(v3, v2, 1)}); + // some statistics (DEBUG) + std::cout << "Some statistics" << std::endl; + graph->statistics(); + + assert(graph->getVertexCount() == 4); + assert(graph->getEdgeCount() == 4); + + assert(morphstore::BFS::compute(graph, v1) == 2); + assert(morphstore::BFS::compute(graph, v2) == 1); + assert(morphstore::BFS::compute(graph, v3) == 1); +} \ No newline at end of file diff --git a/test/core/operators/graph/simple/page_rank_simple_csr_graph_test.cpp b/test/core/operators/graph/simple/page_rank_simple_csr_graph_test.cpp new file mode 100644 index 00000000..ca6a92ea --- /dev/null +++ b/test/core/operators/graph/simple/page_rank_simple_csr_graph_test.cpp @@ -0,0 +1,29 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file page_rank_simple_csr_graph_test.cpp + * @brief Test for page-rank on a simple graph in csr format + * @todo + */ +#include +#include "page_rank_simple_graph_test.h" + +int main( void ){ + page_rank_simple_graph_test(); + return 0; +} diff --git a/test/core/operators/graph/simple/page_rank_simple_graph_test.h b/test/core/operators/graph/simple/page_rank_simple_graph_test.h new file mode 100644 index 00000000..273bac76 --- /dev/null +++ b/test/core/operators/graph/simple/page_rank_simple_graph_test.h @@ -0,0 +1,78 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file page_rank_simple_graph_test.cpp + * @brief Test methods for PageRank on a simple graph + * @todo + */ +#include +#include + +void print_header(std::string storageFormat) { + + std::cout << "\n"; + std::cout << "**********************************************************" << std::endl; + std::cout << "* MorphStore-Operator-Test: Simple " << storageFormat << " Page-Rank Test *" << std::endl; + std::cout << "**********************************************************" << std::endl; + std::cout << "\n"; +} + +template +void page_rank_simple_graph_test (void) { + + static_assert(std::is_base_of::value, "type parameter of this method must be a graph format"); + + std::shared_ptr graph = std::make_shared(); + print_header(graph->get_storage_format()); + + graph->allocate_graph_structure(4, 4); + + std::map edgeTypeMap = {{1, "knows"}, {2, "likes"}}; + std::map vertexTypeMap = {{0, "Person"}}; + graph->setEdgeTypeDictionary(edgeTypeMap); + graph->set_vertex_type_dictionary(vertexTypeMap); + + uint64_t v1 = graph->add_vertex(); + uint64_t v2 = graph->add_vertex(); + uint64_t v3 = graph->add_vertex(); + graph->add_vertex(); + + + // + graph->add_edges(v1, {morphstore::Edge(v1, v2, 1)}); + graph->add_edges(v2, {morphstore::Edge(v2, v3, 2), morphstore::Edge(v2, v1, 1)}); + graph->add_edges(v3, {morphstore::Edge(v3, v2, 1)}); + + std::cout << "Some statistics" << std::endl; + graph->statistics(); + + assert(graph->getVertexCount() == 4); + assert(graph->getEdgeCount() == 4); + + auto result = morphstore::PageRank::compute(graph, 100); + + std::cout << result.describe() << std::endl; + + for(uint64_t i = 0; i < result.scores.size(); i++) { + std::cout << "id: " << i << " score: " << result.scores.at(i) << std::endl; + } + + assert(result.scores.at(1) > result.scores.at(0)); + assert(result.scores.at(0) == result.scores.at(2)); + assert(result.scores.at(2) > result.scores.at(3)); +} \ No newline at end of file diff --git a/test/core/storage/graph/ldbc/CMakeLists.txt b/test/core/storage/graph/ldbc/CMakeLists.txt new file mode 100644 index 00000000..c9014af9 --- /dev/null +++ b/test/core/storage/graph/ldbc/CMakeLists.txt @@ -0,0 +1,12 @@ +if ( CTEST_ALL OR CTEST_STORAGE ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/storage/graph/ldbc/ldbc_csr_graph_test_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/storage/graph/ldbc/ldbc_adj_graph_test_app ) + + add_executable( ldbc_csr_graph_test_app ldbc_csr_graph_test.cpp) + add_executable( ldbc_adj_graph_test_app ldbc_adj_graph_test.cpp) + target_link_libraries(ldbc_adj_graph_test_app PRIVATE "-ldl" stdc++fs) + target_link_libraries(ldbc_csr_graph_test_app PRIVATE "-ldl" stdc++fs) + + add_test( ldbc_csr_graph_test ldbc_csr_graph_test_app ) + add_test( ldbc_adj_graph_test ldbc_adj_graph_test_app ) +endif() \ No newline at end of file diff --git a/test/core/storage/graph/ldbc/ldbc_adj_graph_test.cpp b/test/core/storage/graph/ldbc/ldbc_adj_graph_test.cpp new file mode 100644 index 00000000..6804104c --- /dev/null +++ b/test/core/storage/graph/ldbc/ldbc_adj_graph_test.cpp @@ -0,0 +1,34 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file ldbc_graph_adjacency.cpp + * @brief Test for generating social network graph in adj. list format + BFS measurements + * @todo + */ +#include +#include "ldbc_graph_test.h" + +int main( void ){ + ldbcGraphFormatTest(); + + // Execute BFS measurements: + //std::unique_ptr bfs = std::make_unique(g1); + //bfs->do_measurements(10000, "/home/florentin/Morphstore/Output/adj_bfs_SF1.csv"); + + return 0; +} diff --git a/test/core/storage/graph/ldbc/ldbc_csr_graph_test.cpp b/test/core/storage/graph/ldbc/ldbc_csr_graph_test.cpp new file mode 100644 index 00000000..cf22e888 --- /dev/null +++ b/test/core/storage/graph/ldbc/ldbc_csr_graph_test.cpp @@ -0,0 +1,34 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file ldbc_graph_adjacency.cpp + * @brief Test for generating social network graph in adj. list format + BFS measurements + * @todo + */ +#include +#include "ldbc_graph_test.h" + +int main( void ){ + ldbcGraphFormatTest(); + + // Execute BFS measurements: + //std::unique_ptr bfs = std::make_unique(g1); + //bfs->do_measurements(10000, "/home/florentin/Morphstore/Output/adj_bfs_SF1.csv"); + + return 0; +} diff --git a/test/core/storage/graph/ldbc/ldbc_graph_test.h b/test/core/storage/graph/ldbc/ldbc_graph_test.h new file mode 100644 index 00000000..1a942fe2 --- /dev/null +++ b/test/core/storage/graph/ldbc/ldbc_graph_test.h @@ -0,0 +1,77 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file ldbc_graph_test.cpp + * @brief Test for generating social network graph in a given graph format + * @todo + */ + +#include +#include + +void print_header(std::string storageFormat) { + std::cout << "\n"; + std::cout << "**********************************************************" << std::endl; + std::cout << "* MorphStore-Storage-Test: LDBC " << storageFormat << " Storage Format *" << std::endl; + std::cout << "**********************************************************" << std::endl; + std::cout << "\n"; +} + +template void ldbcGraphFormatTest(void) { + + static_assert(std::is_base_of::value, + "type parameter of this method must be a graph format"); + +#ifdef LDBC_DIR + std::unique_ptr graph = std::make_unique(); + + std::string storageFormat = graph->get_storage_format(); + + print_header(storageFormat); + + std::unique_ptr ldbcImport = std::make_unique(LDBC_DIR); + + // generate vertices & edges from LDBC files and insert into graph structure + ldbcImport->import(*graph); + graph->statistics(); + + graph->print_vertex_by_id(1035174); + graph->print_edge_by_id(10); + graph->print_neighbors_of_vertex(1035174); + + graph->morph(morphstore::GraphCompressionFormat::DELTA); + + graph->statistics(); + + graph->print_vertex_by_id(1035174); + graph->print_edge_by_id(10); + graph->print_neighbors_of_vertex(1035174); + + // DEBUGGING + //for(uint64_t id = 0; id < graph->getEdgeCount(); id++) { + // graph->get_outgoing_edge_ids(id); + //} + + // measure degree distribution and write to file (file path as parameter): + // TODO: but this into benchmark or so .. not actual test + // std::cout << "Measure degree count" << std::endl; + // graph->measure_degree_count(targetDir + "graph_degree_count_" + storageFormat + "SF1.csv"); +#else + throw std::invalid_argument("You forgot to define/uncomment the LDBC_DIR (at CMakeList.txt)"); +#endif +} \ No newline at end of file diff --git a/test/core/storage/graph/simple/CMakeLists.txt b/test/core/storage/graph/simple/CMakeLists.txt new file mode 100644 index 00000000..1b6f5b91 --- /dev/null +++ b/test/core/storage/graph/simple/CMakeLists.txt @@ -0,0 +1,10 @@ +if ( CTEST_ALL OR CTEST_STORAGE ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/storage/graph/simple/simple_csr_graph_test_app ) + FILE( REMOVE ${CMAKE_BINARY_DIR}/test/core/storage/graph/simple/simple_adj_graph_test_app ) + + add_executable( simple_csr_graph_test_app simple_csr_graph_test.cpp) + add_executable( simple_adj_graph_test_app simple_adj_graph_test.cpp) + + add_test( simple_csr_graph_test simple_csr_graph_test_app ) + add_test( simple_adj_graph_test simple_adj_graph_test_app ) +endif() \ No newline at end of file diff --git a/test/core/storage/graph/simple/simple_adj_graph_test.cpp b/test/core/storage/graph/simple/simple_adj_graph_test.cpp new file mode 100644 index 00000000..2b857755 --- /dev/null +++ b/test/core/storage/graph/simple/simple_adj_graph_test.cpp @@ -0,0 +1,31 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file simple_graph_test_adj.cpp + * @brief Test for generating simple graph in adj. list format + * @todo + */ + +#include "simple_graph_test.h" +#include + +int main(void) { + simpleGraphFormatTest(); + + return 0; +} diff --git a/test/core/storage/graph/simple/simple_csr_graph_test.cpp b/test/core/storage/graph/simple/simple_csr_graph_test.cpp new file mode 100644 index 00000000..8231eaba --- /dev/null +++ b/test/core/storage/graph/simple/simple_csr_graph_test.cpp @@ -0,0 +1,31 @@ +/********************************************************************************************** + * Copyright (C) 2019 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file simple_graph_test_csr.cpp + * @brief Test for generating simple graph in csr list format + * @todo + */ + +#include "simple_graph_test.h" +#include + +int main(void) { + simpleGraphFormatTest(); + + return 0; +} diff --git a/test/core/storage/graph/simple/simple_graph_test.h b/test/core/storage/graph/simple/simple_graph_test.h new file mode 100644 index 00000000..d9746575 --- /dev/null +++ b/test/core/storage/graph/simple/simple_graph_test.h @@ -0,0 +1,81 @@ +/********************************************************************************************** + * Copyright (C) 2020 by MorphStore-Team * + * * + * This file is part of MorphStore - a compression aware vectorized column store. * + * * + * This program is free software: you can redistribute it and/or modify it under the * + * terms of the GNU General Public License as published by the Free Software Foundation, * + * either version 3 of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * + * See the GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License along with this program. * + * If not, see . * + **********************************************************************************************/ + +/** + * @file simple_graph_test.cpp + * @brief Base test for testing graph formats on a very simple graph + * @todo + */ +#include +#include + +void print_header(std::string storageFormat) { + std::cout << "\n"; + std::cout << "**********************************************************" << std::endl; + std::cout << "* MorphStore-Storage-Test: Simple " << storageFormat << " Storage Format *" << std::endl; + std::cout << "**********************************************************" << std::endl; + std::cout << "\n"; +} + +template void simpleGraphFormatTest(void) { + static_assert(std::is_base_of::value, + "type parameter of this method must be a graph format"); + + std::unique_ptr graph = std::make_unique(); + print_header(graph->get_storage_format()); + + graph->allocate_graph_structure(3, 3); + + std::map edgeTypeMap = {{1, "knows"}, {2, "likes"}}; + std::map vertexTypeMap = {{0, "Person"}}; + graph->setEdgeTypeDictionary(edgeTypeMap); + graph->set_vertex_type_dictionary(vertexTypeMap); + + uint64_t v1 = graph->add_vertex(0, {{"age", "12"}}); + uint64_t v2 = graph->add_vertex(0); + uint64_t v3 = graph->add_vertex(0); + + auto v1_edge_ids = + graph->add_edges(v1, {morphstore::EdgeWithProperties( + v1, v2, 1, {{"rating", 42}, {"description", "has the answer to everything"}})}); + graph->add_edges(v2, {morphstore::Edge(v2, v3, 2), morphstore::Edge(v2, v3, 1)}); + + // (DEBUG) + graph->statistics(); + + graph->print_neighbors_of_vertex(0); + /* graph->print_neighbors_of_vertex(v1); + graph->print_neighbors_of_vertex(v2); + graph->print_neighbors_of_vertex(v3); */ + + assert(graph->getVertexCount() == 3); + assert(graph->getEdgeCount() == 3); + assert((int)graph->get_edge(v1_edge_ids[0]).getProperties().size() == 2); + assert(graph->get_out_degree(v3) == 0); + assert(graph->get_out_degree(v1) == 1); + assert(graph->get_out_degree(v2) == 2); + + graph->morph(morphstore::GraphCompressionFormat::DELTA); + + graph->statistics(); + + assert(graph->get_out_degree(v3) == 0); + assert(graph->get_out_degree(v1) == 1); + assert(graph->get_out_degree(v2) == 2); + + // assert(false); +}