From 44d14d0492f334bec44f9bad3637ed7fcf8910f9 Mon Sep 17 00:00:00 2001 From: Nirjhar Mukherjee Date: Fri, 8 Oct 2021 19:33:08 +0000 Subject: [PATCH 1/9] Nirjhar's index persistence work --- libgalois/include/katana/PropertyGraph.h | 32 +++++++++++++++++++++- libgalois/src/PropertyGraph.cpp | 34 +++++++++++++++++++++--- libtsuba/include/tsuba/RDG.h | 10 +++++++ libtsuba/src/RDG.cpp | 23 ++++++++++++++++ libtsuba/src/RDGPartHeader.h | 30 +++++++++++++++++++++ 5 files changed, 124 insertions(+), 5 deletions(-) diff --git a/libgalois/include/katana/PropertyGraph.h b/libgalois/include/katana/PropertyGraph.h index 412d85ad43..c0b55e68b0 100644 --- a/libgalois/include/katana/PropertyGraph.h +++ b/libgalois/include/katana/PropertyGraph.h @@ -88,16 +88,46 @@ class KATANA_EXPORT PropertyGraph { /// The edge EntityTypeID for each edge's most specific type EntityTypeIDArray edge_entity_type_ids_; - // List of node and edge indexes on this graph. + // List of node indexes on this graph. std::vector>> node_indexes_; + //And the columns that created them to persist in json + std::vector node_property_indexes_column_name_; + + // List of edge indexes on this graph. std::vector>> edge_indexes_; + //And the columns that created them to persist in json + std::vector edge_property_indexes_column_name_; PGViewCache pg_view_cache_; friend class PropertyGraphRetractor; + // recreate indexes from json + katana::Result recreate_node_property_indexes() { + node_property_indexes_column_name_ = + rdg_.node_property_indexes_column_name(); + for (const std::string& column_name : node_property_indexes_column_name_) { + auto result = MakeNodeIndex(column_name); + if (!result) { + return result.error(); + } + } + return katana::ResultSuccess(); + } + katana::Result recreate_edge_property_indexes() { + edge_property_indexes_column_name_ = + rdg_.edge_property_indexes_column_name(); + for (const std::string& column_name : edge_property_indexes_column_name_) { + auto result = MakeEdgeIndex(column_name); + if (!result) { + return result.error(); + } + } + return katana::ResultSuccess(); + } + public: /// PropertyView provides a uniform interface when you don't need to /// distinguish operating on edge or node properties diff --git a/libgalois/src/PropertyGraph.cpp b/libgalois/src/PropertyGraph.cpp index 4d3822da5c..45c9fcacd0 100644 --- a/libgalois/src/PropertyGraph.cpp +++ b/libgalois/src/PropertyGraph.cpp @@ -219,6 +219,8 @@ katana::PropertyGraph::Make( katana::GraphTopology topo = KATANA_CHECKED(MapTopology(rdg.topology_file_storage())); + std::unique_ptr property_graph; + if (rdg.IsEntityTypeIDsOutsideProperties()) { KATANA_LOG_DEBUG("loading EntityType data from outside properties"); @@ -236,7 +238,7 @@ katana::PropertyGraph::Make( EntityTypeManager edge_type_manager = KATANA_CHECKED(rdg.edge_entity_type_manager()); - return std::make_unique( + property_graph = std::make_unique( std::move(rdg_file), std::move(rdg), std::move(topo), std::move(node_type_ids), std::move(edge_type_ids), std::move(node_type_manager), std::move(edge_type_manager)); @@ -244,16 +246,26 @@ katana::PropertyGraph::Make( } else { // we must construct id_arrays and managers from properties - auto pg = std::make_unique( + property_graph = std::make_unique( std::move(rdg_file), std::move(rdg), std::move(topo), MakeDefaultEntityTypeIDArray(topo.num_nodes()), MakeDefaultEntityTypeIDArray(topo.num_edges()), EntityTypeManager{}, EntityTypeManager{}); - KATANA_CHECKED(pg->ConstructEntityTypeIDs()); + KATANA_CHECKED(property_graph->ConstructEntityTypeIDs()); + } + + auto res = property_graph->recreate_node_property_indexes(); + if (!res) { + return res.error(); + } - return MakeResult(std::move(pg)); + res = property_graph->recreate_edge_property_indexes(); + if (!res) { + return res.error(); } + + return MakeResult(std::move(property_graph)); } katana::Result> @@ -952,6 +964,13 @@ katana::PropertyGraph::MakeNodeIndex(const std::string& column_name) { node_indexes_.push_back(std::move(index)); + //save the column name the index was created from for easy assess dudring json load/store + node_property_indexes_column_name_.push_back(column_name); + + //persist column names to json, index can now can be recreated using recreate_node_property_indexes() + rdg_.set_node_property_indexes_column_name( + node_property_indexes_column_name_); + return katana::ResultSuccess(); } @@ -981,6 +1000,13 @@ katana::PropertyGraph::MakeEdgeIndex(const std::string& column_name) { edge_indexes_.push_back(std::move(index)); + //save the column name the index was created from for easy assess dudring json load/store + edge_property_indexes_column_name_.push_back(column_name); + + //persist column names to json, index can now can be recreated using recreate_edge_property_indexes() + rdg_.set_edge_property_indexes_column_name( + edge_property_indexes_column_name_); + return katana::ResultSuccess(); } diff --git a/libtsuba/include/tsuba/RDG.h b/libtsuba/include/tsuba/RDG.h index d578280040..623665193e 100644 --- a/libtsuba/include/tsuba/RDG.h +++ b/libtsuba/include/tsuba/RDG.h @@ -230,6 +230,16 @@ class KATANA_EXPORT RDG { /// Remove all edge properties void DropEdgeProperties(); + // write the list of node and edge column names persisted to json, private as it is called only when the node and edge property index vectors are pushed back + void set_node_property_indexes_column_name( + std::vector& node_property_indexes_column_name); + void set_edge_property_indexes_column_name( + std::vector& edge_property_indexes_column_name); + + // read the same as above and recreate indexes + std::vector& node_property_indexes_column_name(); + std::vector& edge_property_indexes_column_name(); + /// Remove topology data katana::Result DropTopology(); diff --git a/libtsuba/src/RDG.cpp b/libtsuba/src/RDG.cpp index 4c037fe30d..5532e9764f 100644 --- a/libtsuba/src/RDG.cpp +++ b/libtsuba/src/RDG.cpp @@ -215,6 +215,29 @@ tsuba::RDG::WritePartArrays(const katana::Uri& dir, tsuba::WriteGroup* desc) { return next_properties; } +//write the list of node and edge column names persisted to json, private as it is called only when the node and edge property index vectors are pushed back +void +tsuba::RDG::set_node_property_indexes_column_name( + std::vector& node_property_indexes_column_name) { + core_->part_header().set_node_property_indexes_column_name( + node_property_indexes_column_name); +} +void +tsuba::RDG::set_edge_property_indexes_column_name( + std::vector& edge_property_indexes_column_name) { + core_->part_header().set_edge_property_indexes_column_name( + edge_property_indexes_column_name); +} +// read the same as above and recreate indexes +std::vector& +tsuba::RDG::node_property_indexes_column_name() { + return core_->part_header().node_property_indexes_column_name(); +} +std::vector& +tsuba::RDG::edge_property_indexes_column_name() { + return core_->part_header().edge_property_indexes_column_name(); +} + katana::Result tsuba::RDG::DoStoreTopology( RDGHandle handle, std::unique_ptr topology_ff, diff --git a/libtsuba/src/RDGPartHeader.h b/libtsuba/src/RDGPartHeader.h index be2b0cc0ba..db213d0353 100644 --- a/libtsuba/src/RDGPartHeader.h +++ b/libtsuba/src/RDGPartHeader.h @@ -278,6 +278,30 @@ class KATANA_EXPORT RDGPartHeader { part_prop_info_list_ = std::move(part_prop_info_list); } + const std::vector& node_property_indexes_column_name() const { + return node_property_indexes_column_name_; + } + std::vector& node_property_indexes_column_name() { + return node_property_indexes_column_name_; + } + void set_node_property_indexes_column_name( + std::vector& node_property_indexes_column_name) { + node_property_indexes_column_name_ = + std::move(node_property_indexes_column_name); + } + + const std::vector& edge_property_indexes_column_name() const { + return edge_property_indexes_column_name_; + } + std::vector& edge_property_indexes_column_name() { + return edge_property_indexes_column_name_; + } + void set_edge_property_indexes_column_name( + std::vector& edge_property_indexes_column_name) { + edge_property_indexes_column_name_ = + std::move(edge_property_indexes_column_name); + } + const PartitionMetadata& metadata() const { return metadata_; } void set_metadata(const PartitionMetadata& metadata) { metadata_ = metadata; } @@ -501,6 +525,12 @@ class KATANA_EXPORT RDGPartHeader { std::vector node_prop_info_list_; std::vector edge_prop_info_list_; + /// Column Names to create property index from on startup + std::vector + node_property_indexes_column_name_; //nhomann serializes this automagically. to/from json required if column name type is (in the future) changed from string to a custom one + std::vector + edge_property_indexes_column_name_; //nhomann serializes this automagically. to/from json required if column name type is (in the future) changed from string to a custom one + /// Metadata filled in by CuSP, or from storage (meta partition file) PartitionMetadata metadata_; From 28bd5ec7ea0f7a65434a4b0054473226cbb3f609 Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Mon, 11 Oct 2021 20:36:50 +0000 Subject: [PATCH 2/9] Update names and data model for persistence. --- libgalois/include/katana/PropertyGraph.h | 31 ++----------- libgalois/src/PropertyGraph.cpp | 56 ++++++++++++++---------- libtsuba/include/tsuba/RDG.h | 19 ++++---- libtsuba/src/RDG.cpp | 37 ++++++++-------- libtsuba/src/RDGPartHeader.h | 38 +++++++--------- 5 files changed, 80 insertions(+), 101 deletions(-) diff --git a/libgalois/include/katana/PropertyGraph.h b/libgalois/include/katana/PropertyGraph.h index c0b55e68b0..b9e4c9ac20 100644 --- a/libgalois/include/katana/PropertyGraph.h +++ b/libgalois/include/katana/PropertyGraph.h @@ -74,6 +74,9 @@ class KATANA_EXPORT PropertyGraph { Result WriteView( const std::string& uri, const std::string& command_line); + // Recreate indexes from json + katana::Result RecreatePropertyIndexes(); + tsuba::RDG rdg_; std::unique_ptr file_; GraphTopology topology_; @@ -91,43 +94,15 @@ class KATANA_EXPORT PropertyGraph { // List of node indexes on this graph. std::vector>> node_indexes_; - //And the columns that created them to persist in json - std::vector node_property_indexes_column_name_; // List of edge indexes on this graph. std::vector>> edge_indexes_; - //And the columns that created them to persist in json - std::vector edge_property_indexes_column_name_; PGViewCache pg_view_cache_; friend class PropertyGraphRetractor; - // recreate indexes from json - katana::Result recreate_node_property_indexes() { - node_property_indexes_column_name_ = - rdg_.node_property_indexes_column_name(); - for (const std::string& column_name : node_property_indexes_column_name_) { - auto result = MakeNodeIndex(column_name); - if (!result) { - return result.error(); - } - } - return katana::ResultSuccess(); - } - katana::Result recreate_edge_property_indexes() { - edge_property_indexes_column_name_ = - rdg_.edge_property_indexes_column_name(); - for (const std::string& column_name : edge_property_indexes_column_name_) { - auto result = MakeEdgeIndex(column_name); - if (!result) { - return result.error(); - } - } - return katana::ResultSuccess(); - } - public: /// PropertyView provides a uniform interface when you don't need to /// distinguish operating on edge or node properties diff --git a/libgalois/src/PropertyGraph.cpp b/libgalois/src/PropertyGraph.cpp index 45c9fcacd0..7e7d65b9b5 100644 --- a/libgalois/src/PropertyGraph.cpp +++ b/libgalois/src/PropertyGraph.cpp @@ -255,15 +255,7 @@ katana::PropertyGraph::Make( KATANA_CHECKED(property_graph->ConstructEntityTypeIDs()); } - auto res = property_graph->recreate_node_property_indexes(); - if (!res) { - return res.error(); - } - - res = property_graph->recreate_edge_property_indexes(); - if (!res) { - return res.error(); - } + KATANA_CHECKED(property_graph->RecreatePropertyIndexes()); return MakeResult(std::move(property_graph)); } @@ -480,6 +472,19 @@ katana::PropertyGraph::DoWrite( ? KATANA_CHECKED(WriteEntityTypeIDsArray(edge_entity_type_ids_)) : nullptr; + // Update lists of node and edge index columns. + std::vector node_index_columns(node_indexes_.size()); + std::transform( + node_indexes_.begin(), node_indexes_.end(), node_index_columns.begin(), + [](const auto& index) { return index->column_name(); }); + rdg_.set_node_property_index_columns(std::move(node_index_columns)); + + std::vector edge_index_columns(edge_indexes_.size()); + std::transform( + edge_indexes_.begin(), edge_indexes_.end(), edge_index_columns.begin(), + [](const auto& index) { return index->column_name(); }); + rdg_.set_edge_property_index_columns(std::move(edge_index_columns)); + return rdg_.Store( handle, command_line, versioning_action, std::move(topology_res), std::move(node_entity_type_id_array_res), @@ -964,13 +969,6 @@ katana::PropertyGraph::MakeNodeIndex(const std::string& column_name) { node_indexes_.push_back(std::move(index)); - //save the column name the index was created from for easy assess dudring json load/store - node_property_indexes_column_name_.push_back(column_name); - - //persist column names to json, index can now can be recreated using recreate_node_property_indexes() - rdg_.set_node_property_indexes_column_name( - node_property_indexes_column_name_); - return katana::ResultSuccess(); } @@ -1000,13 +998,6 @@ katana::PropertyGraph::MakeEdgeIndex(const std::string& column_name) { edge_indexes_.push_back(std::move(index)); - //save the column name the index was created from for easy assess dudring json load/store - edge_property_indexes_column_name_.push_back(column_name); - - //persist column names to json, index can now can be recreated using recreate_edge_property_indexes() - rdg_.set_edge_property_indexes_column_name( - edge_property_indexes_column_name_); - return katana::ResultSuccess(); } @@ -1315,3 +1306,22 @@ katana::PropertyGraph::GetNodePropertyIndex( } return KATANA_ERROR(katana::ErrorCode::NotFound, "node index not found"); } + +katana::Result +katana::PropertyGraph::RecreatePropertyIndexes() { + for (const std::string& column_name : rdg_.node_property_index_columns()) { + auto result = MakeNodeIndex(column_name); + if (!result) { + return result.error(); + } + } + + for (const std::string& column_name : rdg_.edge_property_index_columns()) { + auto result = MakeEdgeIndex(column_name); + if (!result) { + return result.error(); + } + } + + return katana::ResultSuccess(); +} diff --git a/libtsuba/include/tsuba/RDG.h b/libtsuba/include/tsuba/RDG.h index 623665193e..64c0970c66 100644 --- a/libtsuba/include/tsuba/RDG.h +++ b/libtsuba/include/tsuba/RDG.h @@ -230,15 +230,16 @@ class KATANA_EXPORT RDG { /// Remove all edge properties void DropEdgeProperties(); - // write the list of node and edge column names persisted to json, private as it is called only when the node and edge property index vectors are pushed back - void set_node_property_indexes_column_name( - std::vector& node_property_indexes_column_name); - void set_edge_property_indexes_column_name( - std::vector& edge_property_indexes_column_name); - - // read the same as above and recreate indexes - std::vector& node_property_indexes_column_name(); - std::vector& edge_property_indexes_column_name(); + // Write the list of node and edge column names persisted to json. Consumes + // the provided parameters. + void set_node_property_index_columns( + std::vector&& node_property_index_columns); + void set_edge_property_index_columns( + std::vector&& edge_property_index_columns); + + // Return the list of node and edge column names. + const std::vector& node_property_index_columns(); + const std::vector& edge_property_index_columns(); /// Remove topology data katana::Result DropTopology(); diff --git a/libtsuba/src/RDG.cpp b/libtsuba/src/RDG.cpp index 5532e9764f..d7ae48f6c7 100644 --- a/libtsuba/src/RDG.cpp +++ b/libtsuba/src/RDG.cpp @@ -215,27 +215,28 @@ tsuba::RDG::WritePartArrays(const katana::Uri& dir, tsuba::WriteGroup* desc) { return next_properties; } -//write the list of node and edge column names persisted to json, private as it is called only when the node and edge property index vectors are pushed back void -tsuba::RDG::set_node_property_indexes_column_name( - std::vector& node_property_indexes_column_name) { - core_->part_header().set_node_property_indexes_column_name( - node_property_indexes_column_name); +tsuba::RDG::set_node_property_index_columns( + std::vector&& node_property_index_columns) { + core_->part_header().set_node_property_index_columns( + std::move(node_property_index_columns)); } + void -tsuba::RDG::set_edge_property_indexes_column_name( - std::vector& edge_property_indexes_column_name) { - core_->part_header().set_edge_property_indexes_column_name( - edge_property_indexes_column_name); -} -// read the same as above and recreate indexes -std::vector& -tsuba::RDG::node_property_indexes_column_name() { - return core_->part_header().node_property_indexes_column_name(); -} -std::vector& -tsuba::RDG::edge_property_indexes_column_name() { - return core_->part_header().edge_property_indexes_column_name(); +tsuba::RDG::set_edge_property_index_columns( + std::vector&& edge_property_index_columns) { + core_->part_header().set_edge_property_index_columns( + std::move(edge_property_index_columns)); +} + +const std::vector& +tsuba::RDG::node_property_index_columns() { + return core_->part_header().node_property_index_columns(); +} + +const std::vector& +tsuba::RDG::edge_property_index_columns() { + return core_->part_header().edge_property_index_columns(); } katana::Result diff --git a/libtsuba/src/RDGPartHeader.h b/libtsuba/src/RDGPartHeader.h index db213d0353..e7be960122 100644 --- a/libtsuba/src/RDGPartHeader.h +++ b/libtsuba/src/RDGPartHeader.h @@ -278,28 +278,22 @@ class KATANA_EXPORT RDGPartHeader { part_prop_info_list_ = std::move(part_prop_info_list); } - const std::vector& node_property_indexes_column_name() const { - return node_property_indexes_column_name_; - } - std::vector& node_property_indexes_column_name() { - return node_property_indexes_column_name_; - } - void set_node_property_indexes_column_name( - std::vector& node_property_indexes_column_name) { - node_property_indexes_column_name_ = - std::move(node_property_indexes_column_name); + const std::vector& node_property_index_columns() const { + return node_property_index_columns_; } - const std::vector& edge_property_indexes_column_name() const { - return edge_property_indexes_column_name_; + void set_node_property_index_columns( + std::vector&& node_property_index_columns) { + node_property_index_columns_ = std::move(node_property_index_columns); } - std::vector& edge_property_indexes_column_name() { - return edge_property_indexes_column_name_; + + const std::vector& edge_property_index_columns() const { + return edge_property_index_columns_; } - void set_edge_property_indexes_column_name( - std::vector& edge_property_indexes_column_name) { - edge_property_indexes_column_name_ = - std::move(edge_property_indexes_column_name); + + void set_edge_property_index_columns( + std::vector&& edge_property_index_columns) { + edge_property_index_columns_ = std::move(edge_property_index_columns); } const PartitionMetadata& metadata() const { return metadata_; } @@ -525,11 +519,9 @@ class KATANA_EXPORT RDGPartHeader { std::vector node_prop_info_list_; std::vector edge_prop_info_list_; - /// Column Names to create property index from on startup - std::vector - node_property_indexes_column_name_; //nhomann serializes this automagically. to/from json required if column name type is (in the future) changed from string to a custom one - std::vector - edge_property_indexes_column_name_; //nhomann serializes this automagically. to/from json required if column name type is (in the future) changed from string to a custom one + /// Column names to create property index from on startup + std::vector node_property_index_columns_; + std::vector edge_property_index_columns_; /// Metadata filled in by CuSP, or from storage (meta partition file) PartitionMetadata metadata_; From c8484445175454bbc0325612e501fbabbc2d3fe1 Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Mon, 11 Oct 2021 20:59:01 +0000 Subject: [PATCH 3/9] Add index columns to to_json and from_json --- libtsuba/src/RDGPartHeader.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/libtsuba/src/RDGPartHeader.cpp b/libtsuba/src/RDGPartHeader.cpp index 6adf228db1..35130b2de1 100644 --- a/libtsuba/src/RDGPartHeader.cpp +++ b/libtsuba/src/RDGPartHeader.cpp @@ -33,6 +33,9 @@ const char* kEdgeEntityTypeIDDictionaryKey = const char* kNodeEntityTypeIDNameKey = "kg.v1.node_entity_type_id_name"; // Name maps from Atomic Edge Entity Type ID to set of string names for the Edge Entity Type ID const char* kEdgeEntityTypeIDNameKey = "kg.v1.edge_entity_type_id_name"; +// List of node and edge indexed columns +const char* kNodePropertyIndexColumnsKey = "kg.v1.node_property_index_columns"; +const char* kEdgePropertyIndexColumnsKey = "kg.v1.edge_property_index_columns"; // //constexpr std::string_view mirror_nodes_prop_name = "mirror_nodes"; @@ -288,6 +291,8 @@ tsuba::to_json(json& j, const tsuba::RDGPartHeader& header) { {kEdgeEntityTypeIDDictionaryKey, header.edge_entity_type_id_dictionary_}, {kNodeEntityTypeIDNameKey, header.node_entity_type_id_name_}, {kEdgeEntityTypeIDNameKey, header.edge_entity_type_id_name_}, + {kNodePropertyIndexColumnsKey, header.node_property_index_columns_}, + {kEdgePropertyIndexColumnsKey, header.edge_property_index_columns_}, }; } @@ -319,6 +324,16 @@ tsuba::from_json(const json& j, tsuba::RDGPartHeader& header) { j.at(kNodeEntityTypeIDNameKey).get_to(header.node_entity_type_id_name_); j.at(kEdgeEntityTypeIDNameKey).get_to(header.edge_entity_type_id_name_); } + + header.node_property_index_columns_ = {}; + if (auto it = j.find(kNodePropertyIndexColumnsKey); it != j.end()) { + it->get_to(header.node_property_index_columns_); + } + + header.edge_property_index_columns_ = {}; + if (auto it = j.find(kEdgePropertyIndexColumnsKey); it != j.end()) { + it->get_to(header.edge_property_index_columns_); + } } void From a6fcbe662904875514e1ac73bba8865b4b41bc21 Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Mon, 11 Oct 2021 22:29:02 +0000 Subject: [PATCH 4/9] Comment tweaks. --- libtsuba/include/tsuba/RDG.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libtsuba/include/tsuba/RDG.h b/libtsuba/include/tsuba/RDG.h index 64c0970c66..99756a22aa 100644 --- a/libtsuba/include/tsuba/RDG.h +++ b/libtsuba/include/tsuba/RDG.h @@ -230,8 +230,8 @@ class KATANA_EXPORT RDG { /// Remove all edge properties void DropEdgeProperties(); - // Write the list of node and edge column names persisted to json. Consumes - // the provided parameters. + // Set the list of node and edge column names to persist. Consumes the + // provided parameters. void set_node_property_index_columns( std::vector&& node_property_index_columns); void set_edge_property_index_columns( From 733d8d0055ddfe73ea207b2d8ab9314bea0772ec Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Tue, 12 Oct 2021 16:26:54 +0000 Subject: [PATCH 5/9] Add a test for writing/loading a graph with indexes --- libgalois/test/property-index.cpp | 121 ++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 21 deletions(-) diff --git a/libgalois/test/property-index.cpp b/libgalois/test/property-index.cpp index 89107c2f3a..94df35b5dc 100644 --- a/libgalois/test/property-index.cpp +++ b/libgalois/test/property-index.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "TestTypedPropertyGraph.h" #include "katana/Logging.h" @@ -11,8 +12,12 @@ template struct NodeOrEdge { static katana::Result*> MakeIndex( katana::PropertyGraph* pg, const std::string& column_name); + static katana::Result*> GetIndex( + katana::PropertyGraph* pg, const std::string& column_name); static katana::Result AddProperties( katana::PropertyGraph* pg, std::shared_ptr properties); + static std::shared_ptr GetProperty( + katana::PropertyGraph* pg, const std::string& column_name); static size_t num_entities(katana::PropertyGraph* pg); }; @@ -21,12 +26,7 @@ using Edge = NodeOrEdge; template <> katana::Result*> -Node::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { - auto result = pg->MakeNodeIndex(column_name); - if (!result) { - return result.error(); - } - +Node::GetIndex(katana::PropertyGraph* pg, const std::string& column_name) { for (const auto& index : pg->node_indexes()) { if (index->column_name() == column_name) { return index.get(); @@ -37,13 +37,15 @@ Node::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { } template <> -katana::Result*> -Edge::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { - auto result = pg->MakeEdgeIndex(column_name); - if (!result) { - return result.error(); - } +katana::Result*> +Node::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { + KATANA_CHECKED(pg->MakeNodeIndex(column_name)); + return Node::GetIndex(pg, column_name); +} +template <> +katana::Result*> +Edge::GetIndex(katana::PropertyGraph* pg, const std::string& column_name) { for (const auto& index : pg->edge_indexes()) { if (index->column_name() == column_name) { return index.get(); @@ -53,6 +55,13 @@ Edge::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { return KATANA_ERROR(katana::ErrorCode::NotFound, "Created index not found"); } +template <> +katana::Result*> +Edge::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { + KATANA_CHECKED(pg->MakeEdgeIndex(column_name)); + return Edge::GetIndex(pg, column_name); +} + template <> size_t Node::num_entities(katana::PropertyGraph* pg) { @@ -79,6 +88,22 @@ Edge::AddProperties( return pg->AddEdgeProperties(properties); } +template <> +std::shared_ptr +Node::GetProperty(katana::PropertyGraph* pg, const std::string& column_name) { + auto prop_result = pg->GetNodeProperty(column_name); + KATANA_LOG_ASSERT(prop_result); + return prop_result.value()->chunk(0); +} + +template <> +std::shared_ptr +Edge::GetProperty(katana::PropertyGraph* pg, const std::string& column_name) { + auto prop_result = pg->GetEdgeProperty(column_name); + KATANA_LOG_ASSERT(prop_result); + return prop_result.value()->chunk(0); +} + template std::shared_ptr CreatePrimitiveProperty( @@ -200,11 +225,8 @@ TestPrimitiveIndex(size_t num_nodes, size_t line_width) { } template -void -TestStringIndex(size_t num_nodes, size_t line_width) { - using IndexType = katana::StringPropertyIndex; - using ArrayType = arrow::LargeStringArray; - +std::unique_ptr +MakeStringGraph(size_t num_nodes, size_t line_width) { LinePolicy policy{line_width}; std::unique_ptr g = @@ -230,6 +252,32 @@ TestStringIndex(size_t num_nodes, size_t line_width) { nonuniform_index_result, "Could not create index: {}", nonuniform_index_result.error()); + return g; +} + +template +std::unique_ptr +TestStringIndex( + std::unique_ptr g, size_t num_nodes, + size_t line_width) { + using IndexType = katana::StringPropertyIndex; + using ArrayType = arrow::LargeStringArray; + + if (!g) { + g = MakeStringGraph(num_nodes, line_width); + } + + auto uniform_index_result = + NodeOrEdge::GetIndex(g.get(), "uniform"); + KATANA_LOG_VASSERT( + uniform_index_result, "Could not get index: {}", + uniform_index_result.error()); + auto nonuniform_index_result = + NodeOrEdge::GetIndex(g.get(), "nonuniform"); + KATANA_LOG_VASSERT( + nonuniform_index_result, "Could not get index: {}", + nonuniform_index_result.error()); + auto* uniform_index = static_cast(uniform_index_result.value()); auto* nonuniform_index = static_cast(nonuniform_index_result.value()); @@ -253,8 +301,8 @@ TestStringIndex(size_t num_nodes, size_t line_width) { } // The non-uniform index starts at "aaaa" and increases by 2. - auto typed_prop = - std::static_pointer_cast(nonuniform_prop->column(0)->chunk(0)); + auto typed_prop = std::static_pointer_cast( + NodeOrEdge::GetProperty(g.get(), "nonuniform")); it = nonuniform_index->Find("aaaj"); KATANA_LOG_ASSERT(it == nonuniform_index->end()); it = nonuniform_index->LowerBound("aaaj"); @@ -263,6 +311,31 @@ TestStringIndex(size_t num_nodes, size_t line_width) { it = nonuniform_index->UpperBound("aaak"); KATANA_LOG_ASSERT(it != nonuniform_index->end()); KATANA_LOG_ASSERT(typed_prop->GetView(*it) == "aaam"); + + return g; +} + +std::unique_ptr +ReloadGraph(std::unique_ptr g) { + auto uri_res = katana::Uri::MakeRand("/tmp/propertyfilegraph"); + KATANA_LOG_ASSERT(uri_res); + std::string rdg_dir(uri_res.value().path()); + + auto write_result = g->Write(rdg_dir, "test command line"); + + if (!write_result) { + boost::filesystem::remove_all(rdg_dir); + KATANA_LOG_FATAL("writing result: {}", write_result.error()); + } + + katana::Result> make_result = + katana::PropertyGraph::Make(rdg_dir, tsuba::RDGLoadOptions()); + boost::filesystem::remove_all(rdg_dir); + if (!make_result) { + KATANA_LOG_FATAL("making result: {}", make_result.error()); + } + + return std::move(make_result.value()); } int @@ -274,8 +347,14 @@ main() { TestPrimitiveIndex(10, 3); TestPrimitiveIndex(10, 3); - TestStringIndex(10, 3); - TestStringIndex(10, 3); + auto node_g = TestStringIndex(nullptr, 10, 3); + auto edge_g = TestStringIndex(nullptr, 10, 3); + + node_g = ReloadGraph(std::move(node_g)); + edge_g = ReloadGraph(std::move(edge_g)); + + TestStringIndex(std::move(node_g), 10, 3); + TestStringIndex(std::move(edge_g), 10, 3); return 0; } From d62edf70e8ab6d80bf27e8e0ff4b5cfe93c089c8 Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Tue, 12 Oct 2021 17:02:23 +0000 Subject: [PATCH 6/9] Use KATANA_CHECKED --- libgalois/src/PropertyGraph.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/libgalois/src/PropertyGraph.cpp b/libgalois/src/PropertyGraph.cpp index 7e7d65b9b5..febb814dae 100644 --- a/libgalois/src/PropertyGraph.cpp +++ b/libgalois/src/PropertyGraph.cpp @@ -1310,17 +1310,11 @@ katana::PropertyGraph::GetNodePropertyIndex( katana::Result katana::PropertyGraph::RecreatePropertyIndexes() { for (const std::string& column_name : rdg_.node_property_index_columns()) { - auto result = MakeNodeIndex(column_name); - if (!result) { - return result.error(); - } + KATANA_CHECKED(MakeNodeIndex(column_name)); } for (const std::string& column_name : rdg_.edge_property_index_columns()) { - auto result = MakeEdgeIndex(column_name); - if (!result) { - return result.error(); - } + KATANA_CHECKED(MakeEdgeIndex(column_name)); } return katana::ResultSuccess(); From a85f245fb60bf50dd75a27d4de18aa69cef1c394 Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Wed, 13 Oct 2021 22:02:45 +0000 Subject: [PATCH 7/9] Slashy slashy --- libgalois/include/katana/PropertyGraph.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libgalois/include/katana/PropertyGraph.h b/libgalois/include/katana/PropertyGraph.h index b9e4c9ac20..162ef96aa4 100644 --- a/libgalois/include/katana/PropertyGraph.h +++ b/libgalois/include/katana/PropertyGraph.h @@ -74,7 +74,7 @@ class KATANA_EXPORT PropertyGraph { Result WriteView( const std::string& uri, const std::string& command_line); - // Recreate indexes from json + /// Recreate indexes listed in RDG metadata. katana::Result RecreatePropertyIndexes(); tsuba::RDG rdg_; @@ -91,11 +91,11 @@ class KATANA_EXPORT PropertyGraph { /// The edge EntityTypeID for each edge's most specific type EntityTypeIDArray edge_entity_type_ids_; - // List of node indexes on this graph. + /// List of node indexes on this graph. std::vector>> node_indexes_; - // List of edge indexes on this graph. + /// List of edge indexes on this graph. std::vector>> edge_indexes_; From ff38a7ee42469cb4e0ea0add4c17491be8994119 Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Thu, 14 Oct 2021 14:58:03 +0000 Subject: [PATCH 8/9] Only make indexes for loaded properties. Further comment massage. --- libgalois/include/katana/PropertyGraph.h | 2 +- libgalois/src/PropertyGraph.cpp | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/libgalois/include/katana/PropertyGraph.h b/libgalois/include/katana/PropertyGraph.h index 162ef96aa4..f0545d95c6 100644 --- a/libgalois/include/katana/PropertyGraph.h +++ b/libgalois/include/katana/PropertyGraph.h @@ -74,7 +74,7 @@ class KATANA_EXPORT PropertyGraph { Result WriteView( const std::string& uri, const std::string& command_line); - /// Recreate indexes listed in RDG metadata. + /// Recreate indexes from column names in RDG metadata. katana::Result RecreatePropertyIndexes(); tsuba::RDG rdg_; diff --git a/libgalois/src/PropertyGraph.cpp b/libgalois/src/PropertyGraph.cpp index febb814dae..6c2ebe11a2 100644 --- a/libgalois/src/PropertyGraph.cpp +++ b/libgalois/src/PropertyGraph.cpp @@ -1310,11 +1310,15 @@ katana::PropertyGraph::GetNodePropertyIndex( katana::Result katana::PropertyGraph::RecreatePropertyIndexes() { for (const std::string& column_name : rdg_.node_property_index_columns()) { - KATANA_CHECKED(MakeNodeIndex(column_name)); + if (HasNodeProperty(column_name)) { + KATANA_CHECKED(MakeNodeIndex(column_name)); + } } for (const std::string& column_name : rdg_.edge_property_index_columns()) { - KATANA_CHECKED(MakeEdgeIndex(column_name)); + if (HasEdgeProperty(column_name)) { + KATANA_CHECKED(MakeEdgeIndex(column_name)); + } } return katana::ResultSuccess(); From 679d473b4d846d057f76e647331af93f50998b81 Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Thu, 14 Oct 2021 15:15:13 +0000 Subject: [PATCH 9/9] Devalue rvalues --- libgalois/src/PropertyGraph.cpp | 4 ++-- libtsuba/include/tsuba/RDG.h | 4 ++-- libtsuba/src/RDG.cpp | 8 ++++---- libtsuba/src/RDGPartHeader.h | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/libgalois/src/PropertyGraph.cpp b/libgalois/src/PropertyGraph.cpp index 6c2ebe11a2..7a7c952110 100644 --- a/libgalois/src/PropertyGraph.cpp +++ b/libgalois/src/PropertyGraph.cpp @@ -477,13 +477,13 @@ katana::PropertyGraph::DoWrite( std::transform( node_indexes_.begin(), node_indexes_.end(), node_index_columns.begin(), [](const auto& index) { return index->column_name(); }); - rdg_.set_node_property_index_columns(std::move(node_index_columns)); + rdg_.set_node_property_index_columns(node_index_columns); std::vector edge_index_columns(edge_indexes_.size()); std::transform( edge_indexes_.begin(), edge_indexes_.end(), edge_index_columns.begin(), [](const auto& index) { return index->column_name(); }); - rdg_.set_edge_property_index_columns(std::move(edge_index_columns)); + rdg_.set_edge_property_index_columns(edge_index_columns); return rdg_.Store( handle, command_line, versioning_action, std::move(topology_res), diff --git a/libtsuba/include/tsuba/RDG.h b/libtsuba/include/tsuba/RDG.h index 99756a22aa..a865b10617 100644 --- a/libtsuba/include/tsuba/RDG.h +++ b/libtsuba/include/tsuba/RDG.h @@ -233,9 +233,9 @@ class KATANA_EXPORT RDG { // Set the list of node and edge column names to persist. Consumes the // provided parameters. void set_node_property_index_columns( - std::vector&& node_property_index_columns); + const std::vector& node_property_index_columns); void set_edge_property_index_columns( - std::vector&& edge_property_index_columns); + const std::vector& edge_property_index_columns); // Return the list of node and edge column names. const std::vector& node_property_index_columns(); diff --git a/libtsuba/src/RDG.cpp b/libtsuba/src/RDG.cpp index d7ae48f6c7..2148963ae9 100644 --- a/libtsuba/src/RDG.cpp +++ b/libtsuba/src/RDG.cpp @@ -217,16 +217,16 @@ tsuba::RDG::WritePartArrays(const katana::Uri& dir, tsuba::WriteGroup* desc) { void tsuba::RDG::set_node_property_index_columns( - std::vector&& node_property_index_columns) { + const std::vector& node_property_index_columns) { core_->part_header().set_node_property_index_columns( - std::move(node_property_index_columns)); + node_property_index_columns); } void tsuba::RDG::set_edge_property_index_columns( - std::vector&& edge_property_index_columns) { + const std::vector& edge_property_index_columns) { core_->part_header().set_edge_property_index_columns( - std::move(edge_property_index_columns)); + edge_property_index_columns); } const std::vector& diff --git a/libtsuba/src/RDGPartHeader.h b/libtsuba/src/RDGPartHeader.h index e7be960122..18a7948066 100644 --- a/libtsuba/src/RDGPartHeader.h +++ b/libtsuba/src/RDGPartHeader.h @@ -283,8 +283,8 @@ class KATANA_EXPORT RDGPartHeader { } void set_node_property_index_columns( - std::vector&& node_property_index_columns) { - node_property_index_columns_ = std::move(node_property_index_columns); + const std::vector& node_property_index_columns) { + node_property_index_columns_ = node_property_index_columns; } const std::vector& edge_property_index_columns() const { @@ -292,8 +292,8 @@ class KATANA_EXPORT RDGPartHeader { } void set_edge_property_index_columns( - std::vector&& edge_property_index_columns) { - edge_property_index_columns_ = std::move(edge_property_index_columns); + const std::vector& edge_property_index_columns) { + edge_property_index_columns_ = edge_property_index_columns; } const PartitionMetadata& metadata() const { return metadata_; }