From 1270f57e95ef26eb34d8d1e05ae68840f580efbd Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Wed, 19 Jan 2022 00:25:44 +0000 Subject: [PATCH 01/21] initial plugin for katanagraph --- .pre-commit-config.yaml | 4 +- dev-environment.yml | 2 + metagraph/plugins/__init__.py | 10 ++- metagraph/plugins/katana/__init__.py | 1 + metagraph/plugins/katana/algorithms.py | 109 ++++++++++++++++++++++++ metagraph/plugins/katana/translators.py | 95 +++++++++++++++++++++ metagraph/plugins/katana/types.py | 91 ++++++++++++++++++++ 7 files changed, 309 insertions(+), 3 deletions(-) create mode 100644 metagraph/plugins/katana/__init__.py create mode 100644 metagraph/plugins/katana/algorithms.py create mode 100644 metagraph/plugins/katana/translators.py create mode 100644 metagraph/plugins/katana/types.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c86af76e..57c25eb2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: stable + rev: 21.12b0 hooks: - id: black - language_version: python3.7 + language_version: python3.8 diff --git a/dev-environment.yml b/dev-environment.yml index 2db24ce9..c3e823a0 100644 --- a/dev-environment.yml +++ b/dev-environment.yml @@ -32,3 +32,5 @@ dependencies: - conda-forge::grblas - conda-forge::python-louvain - conda-forge::websockets + - katanagraph/label/dev::katana-cpp + - katanagraph/label/dev::katana-python diff --git a/metagraph/plugins/__init__.py b/metagraph/plugins/__init__.py index 2e1faf36..53de8ee0 100644 --- a/metagraph/plugins/__init__.py +++ b/metagraph/plugins/__init__.py @@ -48,6 +48,13 @@ except ImportError: # pragma: no cover has_grblas = False +try: + import katana as _ + + has_katana = True +except ImportError: # pragma: no cover + has_katana = False + try: import numba as _ @@ -66,11 +73,12 @@ def find_plugins(): - from . import core, graphblas, networkx, numpy, pandas, python, scipy + from . import core, graphblas, katana, networkx, numpy, pandas, python, scipy # Default Plugins registry.register_from_modules(core) registry.register_from_modules(graphblas, name="core_graphblas") + registry.register_from_modules(katana, name="core_katana") registry.register_from_modules(networkx, name="core_networkx") registry.register_from_modules(numpy, name="core_numpy") registry.register_from_modules(pandas, name="core_pandas") diff --git a/metagraph/plugins/katana/__init__.py b/metagraph/plugins/katana/__init__.py new file mode 100644 index 00000000..d104c1fb --- /dev/null +++ b/metagraph/plugins/katana/__init__.py @@ -0,0 +1 @@ +from . import algorithms, translators, types diff --git a/metagraph/plugins/katana/algorithms.py b/metagraph/plugins/katana/algorithms.py new file mode 100644 index 00000000..6885c4a0 --- /dev/null +++ b/metagraph/plugins/katana/algorithms.py @@ -0,0 +1,109 @@ +from typing import Tuple + +import numpy as np +from metagraph import NodeID, abstract_algorithm, concrete_algorithm +from metagraph.plugins.core.types import Graph, Vector +from metagraph.plugins.networkx.types import NetworkXGraph +from metagraph.plugins.numpy.types import NumpyNodeMap, NumpyVectorType + +from katana.local.analytics import bfs, jaccard, local_clustering_coefficient + +from .types import KatanaGraph + + +def has_node_prop(kg, node_prop_name): + nschema = kg.loaded_node_schema() + for i in range(len(nschema)): + if nschema[i].name == node_prop_name: + return True + return False + + +# breadth-first search, +@concrete_algorithm("traversal.bfs_iter") +def kg_bfs_iter( + graph: KatanaGraph, source_node: NodeID, depth_limit: int +) -> NumpyVectorType: + """ + .. py:function:: metagraph.algos.traversal.bfs_iter(graph, source_node, depth_limit) + + Use BFS to traverse a graph given a source node and BFS depth limit (implemented by a Katana Graph API) + + :param KatanaGraph graph: The origianl graph to traverse + :param NodeID source_node: The starting node for BFS + :param int depth: The BFS depth + :return: the BFS traversal result in order + :rtype: NumpyVectorType + """ + bfs_prop_name = "bfs_prop_start_from_" + str(source_node) + depth_limit_internal = ( + 2 ** 30 - 1 if depth_limit == -1 else depth_limit + ) # return all the reachable nodes for the default value of depth_limit (-1) + start_node = source_node + if not has_node_prop(graph.value, bfs_prop_name): + bfs(graph.value, start_node, bfs_prop_name) + pg_bfs_list = ( + graph.value.get_node_property(bfs_prop_name).to_pandas().values.tolist() + ) + new_list = [ + [i, pg_bfs_list[i]] + for i in range(len(pg_bfs_list)) + if pg_bfs_list[i] < depth_limit_internal + ] + sorted_list = sorted(new_list, key=lambda each: (each[1], each[0])) + bfs_arr = np.array([each[0] for each in sorted_list]) + return bfs_arr + + +# TODO(pengfei): +# single-source shortest path +# connected components +# PageRank +# betweenness centrality +# triangle counting +# Louvain community detection +# subgraph extraction +# community detection using label propagation\ + + +@abstract_algorithm("traversal.jaccard") +def jaccard_similarity( + graph: Graph( + is_directed=False, + edge_type="map", + edge_dtype={"int", "float"}, + edge_has_negative_weights=False, + ), + compare_node: NodeID, +) -> Vector: + pass + + +@concrete_algorithm("traversal.jaccard") +def jaccard_similarity_kg(graph: KatanaGraph, compare_node: NodeID) -> NumpyVectorType: + jaccard_prop_name = "jaccard_prop_with_" + str(compare_node) + if not has_node_prop(graph.value, jaccard_prop_name): + jaccard(graph.value, compare_node, jaccard_prop_name) + jaccard_similarities = graph.value.get_node_property(jaccard_prop_name).to_numpy() + return jaccard_similarities + + +@abstract_algorithm("clustering.local_clustering_coefficient") +def local_clustering( + graph: Graph( + is_directed=False, + edge_type="map", + edge_dtype={"int", "float"}, + edge_has_negative_weights=False, + ), + prop_name: str = "output", +) -> Vector: + pass + + +@concrete_algorithm("clustering.local_clustering_coefficient") +def local_clustering_kg(graph: KatanaGraph, prop_name: str) -> NumpyVectorType: + if not has_node_prop(graph.value, prop_name): + local_clustering_coefficient(graph.value, prop_name) + out = graph.value.get_node_property(prop_name) + return out.to_pandas().values diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katana/translators.py new file mode 100644 index 00000000..3eea6e19 --- /dev/null +++ b/metagraph/plugins/katana/translators.py @@ -0,0 +1,95 @@ +from collections import OrderedDict + +import metagraph as mg +import networkx as nx +import numpy as np +import pyarrow +from metagraph import translator +from metagraph.plugins.networkx.types import NetworkXGraph +from scipy.sparse import csr_matrix + +from katana.local.import_data import from_csr + +from .types import KatanaGraph + + +@translator +def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: + aprops = NetworkXGraph.Type.compute_abstract_properties( + x, {"node_dtype", "node_type", "edge_type", "is_directed"} + ) + is_weighted = aprops["edge_type"] == "map" + # get the edge list directly from the NetworkX Graph + elist_raw = list(x.value.edges(data=True)) + # sort the eddge list and node list + if aprops["is_directed"]: + elist = sorted(elist_raw, key=lambda each: (each[0], each[1])) + else: + inv_elist = [(each[1], each[0], each[2]) for each in elist_raw] + elist = sorted(elist_raw + inv_elist, key=lambda each: (each[0], each[1])) + nlist = sorted(list(x.value.nodes(data=True)), key=lambda each: each[0]) + # build the CSR format from the edge list (weight, (src, dst)) + row = np.array([each_edge[0] for each_edge in elist]) + col = np.array([each_edge[1] for each_edge in elist]) + data = np.array([each_edge[2]["weight"] for each_edge in elist]) + csr = csr_matrix((data, (row, col)), shape=(len(nlist), len(nlist))) + # call the katana api to build a Graph (unweighted) from the CSR format + # noting that the first 0 in csr.indptr is excluded + pg = from_csr(csr.indptr[1:], csr.indices) + # add the edge weight as a new property + t = pyarrow.table(dict(value_from_translator=data)) + pg.add_edge_property(t) + # use the metagraph's Graph warpper to wrap the katana.local.Graph + return KatanaGraph( + pg_graph=pg, + is_weighted=is_weighted, + edge_weight_prop_name="value_from_translator", + is_directed=aprops["is_directed"], + node_weight_index=0, + ) + + +@translator +def katanagraph_to_networkx(x: KatanaGraph, **props) -> NetworkXGraph: + pg = x.value + dest_list = [ + dest for src in pg for dest in [pg.get_edge_dest(e) for e in pg.edge_ids(src)] + ] + for src in pg: + if pg.edge_ids(src) == range(0, 0): + if src not in dest_list: + raise ValueError("NetworkX does not support graph with isolated nodes") + edge_dict_count = { + (src, dest): 0 + for src in pg + for dest in [pg.get_edge_dest(e) for e in pg.edge_ids(src)] + } + for src in pg: + for dest in [pg.get_edge_dest(e) for e in pg.edge_ids(src)]: + edge_dict_count[(src, dest)] += 1 + if edge_dict_count[(src, dest)] > 1: + raise ValueError( + "NetworkX does not support graph with duplicated edges" + ) + elist = [] + edge_weights = pg.get_edge_property(x.edge_weight_prop_name).to_pandas() + if isinstance(edge_weights[0], np.int64): + elist = [ + (nid, pg.get_edge_dest(j), int(edge_weights[j])) + for nid in pg + for j in pg.edge_ids(nid) + ] + elif isinstance(edge_weights[0], pyarrow.lib.Int64Scalar): + elist = [ + (nid, pg.get_edge_dest(j), edge_weights[j].as_py()) + for nid in pg + for j in pg.edge_ids(nid) + ] + # TODO(pengfei): add more type conversion support: like np.float64 -> float?? + elist = list(OrderedDict.fromkeys(elist)) + if x.is_directed: + graph = nx.DiGraph() + else: + graph = nx.Graph() + graph.add_weighted_edges_from(elist) + return mg.wrappers.Graph.NetworkXGraph(graph) diff --git a/metagraph/plugins/katana/types.py b/metagraph/plugins/katana/types.py new file mode 100644 index 00000000..65909a3e --- /dev/null +++ b/metagraph/plugins/katana/types.py @@ -0,0 +1,91 @@ +import copy +import math +from typing import Any, Dict, List, Set + +import numpy as np +from metagraph.plugins.core.types import Graph +from metagraph.plugins.core.wrappers import GraphWrapper + +import katana.local + + +class KatanaGraph(GraphWrapper, abstract=Graph): + def __init__( + self, + pg_graph, + is_weighted=True, + edge_weight_prop_name="value", + is_directed=True, + node_weight_index=None, + node_dtype=None, + edge_dtype="int", + has_neg_weight=False, + ): + super().__init__() + self._assert_instance(pg_graph, katana.local.Graph) + self.value = pg_graph + self.is_weighted = is_weighted + self.edge_weight_prop_name = edge_weight_prop_name + self.is_directed = is_directed + self.node_weight_index = node_weight_index + self.node_dtype = node_dtype + self.edge_dtype = edge_dtype + self.has_neg_weight = has_neg_weight + + def copy(self): + return KatanaGraph( + copy.deepcopy(self.value), self.is_weighted, self.is_directed + ) + + class TypeMixin: + @classmethod + def _compute_abstract_properties( + cls, obj, props: Set[str], known_props: Dict[str, Any] + ) -> Dict[str, Any]: + ret = known_props.copy() + # fast props + for prop in { + "is_directed", + "node_type", + "node_dtype", + "edge_type", + "edge_dtype", + "edge_has_negative_weights", + } - ret.keys(): + if prop == "is_directed": + ret[prop] = obj.is_directed + if prop == "node_type": + ret[prop] = "set" if obj.node_weight_index is None else "map" + if prop == "node_dtype": + ret[prop] = ( + None if obj.node_weight_index is None else obj.node_dtype + ) + if prop == "edge_type": + ret[prop] = "map" if obj.is_weighted else "set" + if prop == "edge_dtype": + ret[prop] = obj.edge_dtype if obj.is_weighted else None + if prop == "edge_has_negative_weights": + ret[ + prop + ] = ( + obj.has_neg_weight + ) # TODO(pengfei): cover the neg-weight case, and add neg-weight test cases. + return ret + + @classmethod + def assert_equal( + cls, + obj1, + obj2, + aprops1, + aprops2, + cprops1, + cprops2, + *, + rel_tal=1e-9, + abs_tol=0.0, + ): + assert aprops1 == aprops2, f"proterty mismatch: {aprops1} != {aprops2}" + pg1 = obj1.value + pg2 = obj2.value + assert pg1 == pg2, f"the two graphs does not match" From 431d0f1d64b94ca4137d3533d3f251d39f1b55c7 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Wed, 19 Jan 2022 01:44:01 +0000 Subject: [PATCH 02/21] initial addition of tests --- metagraph/tests/plugins/katana/conftest.py | 157 +++++++++ metagraph/tests/plugins/katana/data/edge1.csv | 13 + .../tests/plugins/katana/test_algorithms.py | 325 ++++++++++++++++++ .../tests/plugins/katana/test_translators.py | 109 ++++++ metagraph/tests/plugins/katana/test_types.py | 126 +++++++ 5 files changed, 730 insertions(+) create mode 100644 metagraph/tests/plugins/katana/conftest.py create mode 100644 metagraph/tests/plugins/katana/data/edge1.csv create mode 100644 metagraph/tests/plugins/katana/test_algorithms.py create mode 100644 metagraph/tests/plugins/katana/test_translators.py create mode 100644 metagraph/tests/plugins/katana/test_types.py diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katana/conftest.py new file mode 100644 index 00000000..fd6d6784 --- /dev/null +++ b/metagraph/tests/plugins/katana/conftest.py @@ -0,0 +1,157 @@ +import metagraph as mg +import numpy as np +import pandas as pd +import pyarrow +import pytest +from scipy.sparse import csr_matrix + +import katana.local +from katana.example_data import get_rdg_dataset +from katana.local import Graph +from katana.local.import_data import from_csr + + +# Currently Graph does not support undirected graphs +# we are using directed graphs with symmetric edges to denote undirected graphs. +@pytest.fixture(autouse=True) +def pg_rmat15_cleaned_symmetric(): + katana.local.initialize() + pg = Graph(get_rdg_dataset("rmat15_cleaned_symmetric")) + return pg + + +@pytest.fixture(autouse=True) +def katanagraph_rmat15_cleaned_di(pg_rmat15_cleaned_symmetric): + katana_graph = mg.wrappers.Graph.KatanaGraph(pg_rmat15_cleaned_symmetric) + return katana_graph + + +@pytest.fixture(autouse=True) +def katanagraph_rmat15_cleaned_ud(pg_rmat15_cleaned_symmetric): + katana_graph = mg.wrappers.Graph.KatanaGraph( + pg_rmat15_cleaned_symmetric, + is_weighted=True, + edge_weight_prop_name="value", + is_directed=False, + ) + return katana_graph + + +def gen_pg_cleaned_8_12_from_csr(is_directed): + """ + A helper function for the test, generating Katana's Graph from an edge list + """ + katana.local.initialize() + elist_raw = [ + (0, 1, 4), + (0, 3, 2), + (0, 4, 7), + (1, 3, 3), + (1, 4, 5), + (2, 4, 5), + (2, 5, 2), + (2, 6, 8), + (3, 4, 1), + (4, 7, 4), + (5, 6, 4), + (5, 7, 6), + ] + src_list = [each[0] for each in elist_raw] + dest_list = [each[1] for each in elist_raw] + nlist_raw = list(set(src_list) | set(dest_list)) + # sort the eddge list and node list + if is_directed: + elist = sorted(elist_raw, key=lambda each: (each[0], each[1])) + else: + inv_elist = [(each[1], each[0], each[2]) for each in elist_raw] + elist = sorted(elist_raw + inv_elist, key=lambda each: (each[0], each[1])) + nlist = sorted(nlist_raw, key=lambda each: each) + # build the CSR format from the edge list (weight, (src, dst)) + row = np.array([each_edge[0] for each_edge in elist]) + col = np.array([each_edge[1] for each_edge in elist]) + data = np.array([each_edge[2] for each_edge in elist]) + csr = csr_matrix((data, (row, col)), shape=(len(nlist), len(nlist))) + # call the katana api to build a Graph (unweighted) from the CSR format + # noting that the first 0 in csr.indptr is excluded + pg = from_csr(csr.indptr[1:], csr.indices) + t = pyarrow.table(dict(value=data)) + pg.add_edge_property(t) + return pg + + +@pytest.fixture(autouse=True) +def katanagraph_cleaned_8_12_di(): + pg_cleaned_8_12_from_csr_di = gen_pg_cleaned_8_12_from_csr(is_directed=True) + katana_graph = mg.wrappers.Graph.KatanaGraph(pg_cleaned_8_12_from_csr_di) + return katana_graph + + +@pytest.fixture(autouse=True) +def katanagraph_cleaned_8_12_ud(): + pg_cleaned_8_12_from_csr_ud = gen_pg_cleaned_8_12_from_csr(is_directed=False) + katana_graph = mg.wrappers.Graph.KatanaGraph( + pg_cleaned_8_12_from_csr_ud, + is_weighted=True, + edge_weight_prop_name="value", + is_directed=False, + ) + return katana_graph + + +@pytest.fixture(autouse=True) +def networkx_weighted_undirected_8_12(): + df = pd.read_csv("tests/data/edge1.csv") + em = mg.wrappers.EdgeMap.PandasEdgeMap( + df, "Source", "Destination", "Weight", is_directed=False + ) + graph1 = mg.algos.util.graph.build(em) + return graph1 + + +@pytest.fixture(autouse=True) +def networkx_weighted_directed_8_12(): + df = pd.read_csv("tests/data/edge1.csv") + em = mg.wrappers.EdgeMap.PandasEdgeMap( + df, "Source", "Destination", "Weight", is_directed=True + ) + graph1 = mg.algos.util.graph.build(em) + return graph1 + + +# directed graph +@pytest.fixture(autouse=True) +def kg_from_nx_di_8_12(networkx_weighted_directed_8_12): + pg_test_case = mg.translate( + networkx_weighted_directed_8_12, mg.wrappers.Graph.KatanaGraph + ) + return pg_test_case + + +# undirected graph +@pytest.fixture(autouse=True) +def kg_from_nx_ud_8_12(networkx_weighted_undirected_8_12): + pg_test_case = mg.translate( + networkx_weighted_undirected_8_12, mg.wrappers.Graph.KatanaGraph + ) + return pg_test_case + + +@pytest.fixture(autouse=True) +def nx_from_kg_di_8_12(katanagraph_cleaned_8_12_di): + return mg.translate(katanagraph_cleaned_8_12_di, mg.wrappers.Graph.NetworkXGraph) + + +@pytest.fixture(autouse=True) +def nx_from_kg_ud_8_12(katanagraph_cleaned_8_12_ud): + return mg.translate(katanagraph_cleaned_8_12_ud, mg.wrappers.Graph.NetworkXGraph) + + +def pytest_addoption(parser): + parser.addoption( + "--runslow", action="store_true", default=False, help="run slow tests" + ) + + +def pytest_runtest_setup(item): + if "runslow" in item.keywords and not item.config.getoption("--runslow"): + pytest.skip("need --runslow option to run this test") diff --git a/metagraph/tests/plugins/katana/data/edge1.csv b/metagraph/tests/plugins/katana/data/edge1.csv new file mode 100644 index 00000000..900a0460 --- /dev/null +++ b/metagraph/tests/plugins/katana/data/edge1.csv @@ -0,0 +1,13 @@ +Source,Destination,Weight +0,1,4 +0,3,2 +0,4,7 +1,3,3 +1,4,5 +2,4,5 +2,5,2 +2,6,8 +3,4,1 +4,7,4 +5,6,4 +5,7,6 \ No newline at end of file diff --git a/metagraph/tests/plugins/katana/test_algorithms.py b/metagraph/tests/plugins/katana/test_algorithms.py new file mode 100644 index 00000000..d1ad5382 --- /dev/null +++ b/metagraph/tests/plugins/katana/test_algorithms.py @@ -0,0 +1,325 @@ +import metagraph as mg +import numpy as np +import pytest + + +def test_bfs(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): + bfs1_nx = mg.algos.traversal.bfs_iter(networkx_weighted_directed_8_12, 0) + bfs2_nx = mg.algos.traversal.bfs_iter(networkx_weighted_directed_8_12, 2) + bfs1_kg = mg.algos.traversal.bfs_iter(kg_from_nx_di_8_12, 0) + bfs2_kg = mg.algos.traversal.bfs_iter(kg_from_nx_di_8_12, 2) + assert bfs1_kg.tolist() == bfs1_nx.tolist() + assert bfs2_kg.tolist() == bfs2_nx.tolist() + assert bfs1_kg.tolist() == [0, 1, 3, 4, 7] + assert bfs2_kg.tolist() == [2, 4, 5, 6, 7] + + +def test_bfs_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + src_node = 2 + bfs1_kg = mg.algos.traversal.bfs_iter(katanagraph_cleaned_8_12_di, src_node) + bfs2_kg = mg.algos.traversal.bfs_iter(katanagraph_cleaned_8_12_di, src_node) + bfs_nx = mg.algos.traversal.bfs_iter(nx_from_kg_di_8_12, src_node) + assert bfs1_kg.tolist() == bfs2_kg.tolist() + assert len(bfs1_kg.tolist()) > 0 + assert bfs1_kg.tolist() == bfs_nx.tolist() + + +def test_sssp_bellman_ford(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): + src_node = 0 + sssp_nx = mg.algos.traversal.bellman_ford( + networkx_weighted_directed_8_12, src_node + ) # source node is 0 + parents_nx = sssp_nx[0] + distances_nx = sssp_nx[1] + assert isinstance(parents_nx, dict) + assert isinstance(distances_nx, dict) + assert parents_nx == {0: 0, 1: 0, 3: 0, 4: 3, 7: 4} + assert distances_nx == {0: 0, 1: 4, 3: 2, 4: 3, 7: 7} + parents_kg, distances_kg = mg.algos.traversal.bellman_ford( + kg_from_nx_di_8_12, src_node + ) + assert parents_nx == parents_kg + assert distances_nx == distances_kg + + +# @pytest.mark.runslow +def test_sssp_bellman_ford_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + src_node = 0 + sssp1_kg = mg.algos.traversal.bellman_ford(katanagraph_cleaned_8_12_di, src_node) + sssp2_kg = mg.algos.traversal.bellman_ford(katanagraph_cleaned_8_12_di, src_node) + sssp_nx = mg.algos.traversal.bellman_ford(nx_from_kg_di_8_12, src_node) + assert sssp1_kg[0] == sssp2_kg[0] + assert sssp1_kg[1] == sssp2_kg[1] + assert sssp1_kg[0] == sssp_nx[0] + assert sssp1_kg[1] == sssp_nx[1] + + +def test_sssp_dijkstra(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): + src_node = 1 + sssp_nx = mg.algos.traversal.dijkstra( + networkx_weighted_directed_8_12, src_node + ) # source node is 1 + parents_nx = sssp_nx[0] + distances_nx = sssp_nx[1] + assert isinstance(parents_nx, dict) + assert isinstance(distances_nx, dict) + assert parents_nx == {1: 1, 3: 1, 4: 3, 7: 4} + assert distances_nx == {1: 0, 3: 3, 4: 4, 7: 8} + parents_kg, distances_kg = mg.algos.traversal.dijkstra(kg_from_nx_di_8_12, src_node) + assert parents_nx == parents_kg + assert distances_nx == distances_kg + + +# @pytest.mark.runslow +def test_sssp_dijkstra_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + src_node = 1 + sssp1_kg = mg.algos.traversal.dijkstra(katanagraph_cleaned_8_12_di, src_node) + sssp2_kg = mg.algos.traversal.dijkstra(katanagraph_cleaned_8_12_di, src_node) + sssp_nx = mg.algos.traversal.dijkstra(nx_from_kg_di_8_12, src_node) + assert sssp1_kg[0] == sssp2_kg[0] + assert sssp1_kg[1] == sssp2_kg[1] + assert sssp1_kg[0] == sssp_nx[0] + assert sssp1_kg[1] == sssp_nx[1] + + +def test_connected_components(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12): + cc_nx = mg.algos.clustering.connected_components(networkx_weighted_undirected_8_12) + cc_kg = mg.algos.clustering.connected_components(kg_from_nx_ud_8_12) + assert isinstance(cc_kg, dict) + assert isinstance(cc_kg, dict) + assert cc_kg == cc_nx + + +# @pytest.mark.runslow +def test_connected_components_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + cc_kg1 = mg.algos.clustering.connected_components(katanagraph_cleaned_8_12_ud) + cc_kg2 = mg.algos.clustering.connected_components(katanagraph_cleaned_8_12_ud) + cc_nx = mg.algos.clustering.connected_components(nx_from_kg_ud_8_12) + assert cc_kg1 == cc_kg2 + assert cc_kg1 == cc_nx + + +def test_pagerank(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): + pr_nx = mg.algos.centrality.pagerank(networkx_weighted_directed_8_12) + pr_kg = mg.algos.centrality.pagerank(kg_from_nx_di_8_12) + assert isinstance(pr_nx, dict) + assert isinstance(pr_kg, dict) + assert pr_nx == pr_kg + + +# @pytest.mark.runslow +def test_pagerank_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + pr_kg1 = mg.algos.centrality.pagerank(katanagraph_cleaned_8_12_di) + pr_kg2 = mg.algos.centrality.pagerank(katanagraph_cleaned_8_12_di) + pr_nx = mg.algos.centrality.pagerank(nx_from_kg_di_8_12) + assert pr_kg1 == pr_kg2 + assert pr_kg1 == pr_nx + + +def test_betweenness_centrality(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): + bc_nx = mg.algos.centrality.betweenness(networkx_weighted_directed_8_12) + bc_kg = mg.algos.centrality.betweenness(kg_from_nx_di_8_12) + assert isinstance(bc_nx, dict) + assert isinstance(bc_kg, dict) + assert bc_nx == bc_kg + + +# @pytest.mark.runslow +def test_betweenness_centrality_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + Notice for large graphs, mg.algos.centrality.betweenness is extremely slow (not because of our translator) + """ + bc_kg1 = mg.algos.centrality.betweenness(katanagraph_cleaned_8_12_di) + bc_kg2 = mg.algos.centrality.betweenness(katanagraph_cleaned_8_12_di) + assert bc_kg1 == bc_kg2 + bc_nx = mg.algos.centrality.betweenness(nx_from_kg_di_8_12) + assert bc_kg1 == bc_nx + + +def test_triangle_counting(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12): + tc_nx = mg.algos.clustering.triangle_count(networkx_weighted_undirected_8_12) + tc_kg = mg.algos.clustering.triangle_count(kg_from_nx_ud_8_12) + assert isinstance(tc_nx, int) + assert isinstance(tc_kg, int) + assert tc_nx == tc_kg + + +# @pytest.mark.runslow +def test_triangle_counting_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + tc_kg1 = mg.algos.clustering.triangle_count(katanagraph_cleaned_8_12_ud) + tc_kg2 = mg.algos.clustering.triangle_count(katanagraph_cleaned_8_12_ud) + tc_nx = mg.algos.clustering.triangle_count(nx_from_kg_ud_8_12) + assert tc_kg1 == tc_kg2 + assert tc_kg1 == tc_nx + + +def test_louvain_community_detection( + networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12 +): + lc_nx = mg.algos.clustering.louvain_community(networkx_weighted_undirected_8_12) + lc_kg = mg.algos.clustering.louvain_community(kg_from_nx_ud_8_12) + assert isinstance(lc_nx[0], dict) + assert isinstance(lc_kg[0], dict) + assert isinstance(lc_nx[1], float) + assert isinstance(lc_kg[1], float) + assert lc_nx[0] == lc_kg[0] + assert lc_nx[1] == lc_kg[1] + + +# @pytest.mark.runslow +def test_louvain_community_detection_kg( + katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12 +): + """ + test for katana graph which is directly loaded rather than translated from nettworkx. + We cannot expect two consecutive runs with the same source code give the same results. + The reason is two runs use different random seeds. + Besides, we cannot set the random seed cause the metagraph wrapper hide that option in network's community_louvain.best_partition function + """ + lc_kg1 = mg.algos.clustering.louvain_community(katanagraph_cleaned_8_12_ud) + lc_kg2 = mg.algos.clustering.louvain_community(katanagraph_cleaned_8_12_ud) + lc_nx = mg.algos.clustering.louvain_community(nx_from_kg_ud_8_12) + # assert lc_kg1[0] == lc_kg2[0] # failed cause two runs get different results (different random seeds from network's community_louvain.best_partition function). + assert abs(lc_kg1[1] - lc_kg2[1]) < 0.1 + assert lc_kg1[0] == lc_nx[0] + assert lc_kg1[1] == lc_nx[1] + + +def test_translation_subgraph_extraction( + networkx_weighted_directed_8_12, kg_from_nx_di_8_12 +): + se_nx = mg.algos.subgraph.extract_subgraph( + networkx_weighted_directed_8_12, {0, 2, 3} + ) + se_kg = mg.algos.subgraph.extract_subgraph(kg_from_nx_di_8_12, {0, 2, 3}) + assert isinstance(se_nx, mg.wrappers.Graph.NetworkXGraph) + assert isinstance(se_kg, mg.wrappers.Graph.NetworkXGraph) + assert list(se_nx.value.edges(data=True)) == list(se_kg.value.edges(data=True)) + + +# @pytest.mark.runslow +def test_translation_subgraph_extraction_kg( + katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12 +): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + ids = {0, 4, 5} + se_kg1 = mg.algos.subgraph.extract_subgraph(katanagraph_cleaned_8_12_di, ids) + se_kg2 = mg.algos.subgraph.extract_subgraph(katanagraph_cleaned_8_12_di, ids) + se_nx = mg.algos.subgraph.extract_subgraph(nx_from_kg_di_8_12, ids) + assert list(se_kg1.value.edges(data=True)) == list(se_kg2.value.edges(data=True)) + assert list(se_kg1.value.edges(data=True)) == list(se_nx.value.edges(data=True)) + + +def test_labal_propagation(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12): + cd_nx = mg.algos.clustering.label_propagation_community( + networkx_weighted_undirected_8_12 + ) + cd_kg = mg.algos.clustering.label_propagation_community(kg_from_nx_ud_8_12) + assert isinstance(cd_nx, dict) + assert isinstance(cd_kg, dict) + assert cd_nx == cd_kg + + +# @pytest.mark.runslow +def test_labal_propagation_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + cd_kg1 = mg.algos.clustering.label_propagation_community( + katanagraph_cleaned_8_12_ud + ) + cd_kg2 = mg.algos.clustering.label_propagation_community( + katanagraph_cleaned_8_12_ud + ) + cd_nx = mg.algos.clustering.label_propagation_community(nx_from_kg_ud_8_12) + assert cd_kg1 == cd_kg2 + assert cd_kg1 == cd_nx + + +def test_jaccard_similarity(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12): + compare_node = 0 + prop_name = "jaccard_prop_with_" + str(compare_node) + jcd_nx = mg.algos.traversal.jaccard(networkx_weighted_undirected_8_12, compare_node) + jcd_kg = mg.algos.traversal.jaccard(kg_from_nx_ud_8_12, compare_node) + assert isinstance(jcd_nx, np.ndarray) + assert isinstance(jcd_kg, np.ndarray) + assert jcd_nx.tolist() == jcd_kg.tolist() + assert jcd_kg[compare_node] == 1 + + +# @pytest.mark.runslow +def test_jaccard_similarity_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + compare_node = 3 + prop_name = "jaccard_prop_with_" + str(compare_node) + jcd_kg1 = mg.algos.traversal.jaccard(katanagraph_cleaned_8_12_ud, compare_node) + jcd_kg2 = mg.algos.traversal.jaccard(katanagraph_cleaned_8_12_ud, compare_node) + assert jcd_kg1.tolist() == jcd_kg2.tolist() + assert jcd_kg1[compare_node] == 1 + assert jcd_kg2[compare_node] == 1 + jcd_nx = mg.algos.traversal.jaccard(nx_from_kg_ud_8_12, compare_node) + assert jcd_nx[compare_node] == 1 + assert jcd_kg1.tolist() == jcd_nx.tolist() + + +def test_local_clustering_coefficient( + networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12 +): + prop_name = "output_prop" + lcc_nx = mg.algos.clustering.local_clustering_coefficient( + networkx_weighted_undirected_8_12, prop_name + ) + lcc_kg = mg.algos.clustering.local_clustering_coefficient( + kg_from_nx_ud_8_12, prop_name + ) + assert isinstance(lcc_nx, np.ndarray) + assert isinstance(lcc_kg, np.ndarray) + assert lcc_kg.tolist() == lcc_nx.tolist() + assert lcc_kg[-1] == 0 + assert not np.any(np.isnan(lcc_kg)) + + +# @pytest.mark.runslow +def test_local_clustering_coefficient_kg( + katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12 +): + """ + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + """ + prop_name = "output_prop" + lcc_kg1 = mg.algos.clustering.local_clustering_coefficient( + katanagraph_cleaned_8_12_ud, prop_name + ) + lcc_kg2 = mg.algos.clustering.local_clustering_coefficient( + katanagraph_cleaned_8_12_ud, prop_name + ) + assert lcc_kg1.tolist() == lcc_kg2.tolist() + assert lcc_kg1[-1] == 0 + assert not np.any(np.isnan(lcc_kg1)) + lcc_nx = mg.algos.clustering.local_clustering_coefficient( + nx_from_kg_ud_8_12, prop_name + ) + assert lcc_kg1.tolist() == lcc_nx.tolist() diff --git a/metagraph/tests/plugins/katana/test_translators.py b/metagraph/tests/plugins/katana/test_translators.py new file mode 100644 index 00000000..09d4c05a --- /dev/null +++ b/metagraph/tests/plugins/katana/test_translators.py @@ -0,0 +1,109 @@ +import metagraph as mg + + +def test_num_nodes(kg_from_nx_di_8_12): + nodes_total = 0 + for nid in kg_from_nx_di_8_12.value: + nodes_total += 1 + assert kg_from_nx_di_8_12.value.num_nodes() == nodes_total + assert kg_from_nx_di_8_12.value.num_nodes() == 8 + + +def test_num_edges(kg_from_nx_di_8_12): + edges_total = 0 + for nid in kg_from_nx_di_8_12.value: + edges_total += len(kg_from_nx_di_8_12.value.edge_ids(nid)) + assert kg_from_nx_di_8_12.value.num_edges() == edges_total + assert kg_from_nx_di_8_12.value.num_edges() == 12 + + +def test_topology(kg_from_nx_di_8_12): + assert kg_from_nx_di_8_12.value.edge_ids(0) == range(0, 3) + assert kg_from_nx_di_8_12.value.edge_ids(1) == range(3, 5) + assert kg_from_nx_di_8_12.value.edge_ids(2) == range(5, 8) + assert kg_from_nx_di_8_12.value.edge_ids(3) == range(8, 9) + assert kg_from_nx_di_8_12.value.edge_ids(4) == range(9, 10) + assert kg_from_nx_di_8_12.value.edge_ids(5) == range(10, 12) + assert [ + kg_from_nx_di_8_12.value.get_edge_dest(i) + for i in kg_from_nx_di_8_12.value.edge_ids(0) + ] == [1, 3, 4] + assert [ + kg_from_nx_di_8_12.value.get_edge_dest(i) + for i in kg_from_nx_di_8_12.value.edge_ids(2) + ] == [4, 5, 6] + assert [ + kg_from_nx_di_8_12.value.get_edge_dest(i) + for i in kg_from_nx_di_8_12.value.edge_ids(4) + ] == [7] + assert [ + kg_from_nx_di_8_12.value.get_edge_dest(i) + for i in kg_from_nx_di_8_12.value.edge_ids(5) + ] == [6, 7] + + +def test_schema(kg_from_nx_di_8_12): + assert len(kg_from_nx_di_8_12.value.loaded_node_schema()) == 0 + assert len(kg_from_nx_di_8_12.value.loaded_edge_schema()) == 1 + + +def test_edge_property_directed(kg_from_nx_di_8_12): + assert ( + kg_from_nx_di_8_12.value.loaded_edge_schema()[0].name == "value_from_translator" + ) + assert kg_from_nx_di_8_12.value.get_edge_property( + 0 + ) == kg_from_nx_di_8_12.value.get_edge_property("value_from_translator") + assert kg_from_nx_di_8_12.value.get_edge_property( + "value_from_translator" + ).tolist() == [ + 4, + 2, + 7, + 3, + 5, + 5, + 2, + 8, + 1, + 4, + 4, + 6, + ] + + +def test_compare_node_count(nx_from_kg_di_8_12, katanagraph_cleaned_8_12_di): + nlist = [ + each_node[0] for each_node in list(nx_from_kg_di_8_12.value.nodes(data=True)) + ] + num_no_edge_nodes = 0 + for nid in katanagraph_cleaned_8_12_di.value: + if nid not in nlist: + assert katanagraph_cleaned_8_12_di.value.edge_ids(nid) == range(0, 0) + num_no_edge_nodes += 1 + assert ( + num_no_edge_nodes + len(nlist) == katanagraph_cleaned_8_12_di.value.num_nodes() + ) + assert num_no_edge_nodes == 0 + + +def test_compare_edge_count(nx_from_kg_di_8_12, katanagraph_cleaned_8_12_di): + edge_dict_count = { + (each_e[0], each_e[1]): 0 + for each_e in list(nx_from_kg_di_8_12.value.edges(data=True)) + } + for src in katanagraph_cleaned_8_12_di.value: + for dest in [ + katanagraph_cleaned_8_12_di.value.get_edge_dest(e) + for e in katanagraph_cleaned_8_12_di.value.edge_ids(src) + ]: + if (src, dest) in edge_dict_count: + edge_dict_count[(src, dest)] += 1 + assert ( + sum([edge_dict_count[i] for i in edge_dict_count]) + == katanagraph_cleaned_8_12_di.value.num_edges() + ) + assert ( + len(list(nx_from_kg_di_8_12.value.edges(data=True))) + == katanagraph_cleaned_8_12_di.value.num_edges() + ) diff --git a/metagraph/tests/plugins/katana/test_types.py b/metagraph/tests/plugins/katana/test_types.py new file mode 100644 index 00000000..61e3fc78 --- /dev/null +++ b/metagraph/tests/plugins/katana/test_types.py @@ -0,0 +1,126 @@ +import metagraph as mg +import pytest + + +def test_num_nodes(katanagraph_rmat15_cleaned_di): + cnt = 0 + for nid in katanagraph_rmat15_cleaned_di.value: + cnt += 1 + assert katanagraph_rmat15_cleaned_di.value.num_nodes() == 32768 + assert katanagraph_rmat15_cleaned_di.value.num_nodes() == cnt + + +def test_num_edges(katanagraph_rmat15_cleaned_di): + cnt = 0 + for nid in katanagraph_rmat15_cleaned_di.value: + cnt += len(katanagraph_rmat15_cleaned_di.value.edge_ids(nid)) + assert katanagraph_rmat15_cleaned_di.value.num_edges() == 363194 + assert katanagraph_rmat15_cleaned_di.value.num_edges() == cnt + + +def test_node_schema(katanagraph_rmat15_cleaned_di): + assert "names" in dir(katanagraph_rmat15_cleaned_di.value.loaded_node_schema()) + assert "types" in dir(katanagraph_rmat15_cleaned_di.value.loaded_node_schema()) + assert len(katanagraph_rmat15_cleaned_di.value.loaded_node_schema()) == 0 + + +def test_edge_schema(katanagraph_rmat15_cleaned_di): + assert "names" in dir(katanagraph_rmat15_cleaned_di.value.loaded_edge_schema()) + assert "types" in dir(katanagraph_rmat15_cleaned_di.value.loaded_edge_schema()) + assert len(katanagraph_rmat15_cleaned_di.value.loaded_edge_schema()) == 1 + + +def test_edge_property(katanagraph_rmat15_cleaned_di): + assert katanagraph_rmat15_cleaned_di.value.loaded_edge_schema()[0].name == "value" + assert katanagraph_rmat15_cleaned_di.value.get_edge_property( + 0 + ) == katanagraph_rmat15_cleaned_di.value.get_edge_property("value") + assert ( + katanagraph_rmat15_cleaned_di.value.get_edge_property("value").to_pandas()[0] + == 339302416426 + ) + + +def test_topology(katanagraph_rmat15_cleaned_di): + assert katanagraph_rmat15_cleaned_di.value.edge_ids(0) == range(0, 20767) + assert [ + katanagraph_rmat15_cleaned_di.value.get_edge_dest(i) + for i in katanagraph_rmat15_cleaned_di.value.edge_ids(0) + ][0:5] == [ + 1, + 2, + 3, + 4, + 5, + ] + assert katanagraph_rmat15_cleaned_di.value.edge_ids(8) == range(36475, 41133) + assert [ + katanagraph_rmat15_cleaned_di.value.get_edge_dest(i) + for i in katanagraph_rmat15_cleaned_di.value.edge_ids(8) + ][0:5] == [ + 0, + 9, + 10, + 11, + 12, + ] + + +def test_num_nodes_networkx( + networkx_weighted_undirected_8_12, networkx_weighted_directed_8_12 +): + assert len(list(networkx_weighted_undirected_8_12.value.nodes(data=True))) == 8 + assert len(list(networkx_weighted_directed_8_12.value.nodes(data=True))) == 8 + + +def test_num_edges_networkx( + networkx_weighted_undirected_8_12, networkx_weighted_directed_8_12 +): + assert len(list(networkx_weighted_undirected_8_12.value.edges(data=True))) == 12 + assert len(list(networkx_weighted_directed_8_12.value.edges(data=True))) == 12 + + +def test_topology_networkx( + networkx_weighted_undirected_8_12, networkx_weighted_directed_8_12 +): + assert list(networkx_weighted_undirected_8_12.value.nodes(data=True)) == list( + networkx_weighted_directed_8_12.value.nodes(data=True) + ) + assert list(networkx_weighted_undirected_8_12.value.nodes(data=True)) == [ + (0, {}), + (1, {}), + (3, {}), + (4, {}), + (2, {}), + (5, {}), + (6, {}), + (7, {}), + ] + assert list(networkx_weighted_undirected_8_12.value.edges(data=True)) == [ + (0, 1, {"weight": 4}), + (0, 3, {"weight": 2}), + (0, 4, {"weight": 7}), + (1, 3, {"weight": 3}), + (1, 4, {"weight": 5}), + (3, 4, {"weight": 1}), + (4, 2, {"weight": 5}), + (4, 7, {"weight": 4}), + (2, 5, {"weight": 2}), + (2, 6, {"weight": 8}), + (5, 6, {"weight": 4}), + (5, 7, {"weight": 6}), + ] + assert list(networkx_weighted_directed_8_12.value.edges(data=True)) == [ + (0, 1, {"weight": 4}), + (0, 3, {"weight": 2}), + (0, 4, {"weight": 7}), + (1, 3, {"weight": 3}), + (1, 4, {"weight": 5}), + (3, 4, {"weight": 1}), + (4, 7, {"weight": 4}), + (2, 4, {"weight": 5}), + (2, 5, {"weight": 2}), + (2, 6, {"weight": 8}), + (5, 6, {"weight": 4}), + (5, 7, {"weight": 6}), + ] From 4bb8edc3f05f22e87541d64cb9e1b5411a2d7243 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Wed, 19 Jan 2022 14:26:45 +0000 Subject: [PATCH 03/21] correct the path name --- metagraph/tests/plugins/katana/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katana/conftest.py index fd6d6784..1146c5eb 100644 --- a/metagraph/tests/plugins/katana/conftest.py +++ b/metagraph/tests/plugins/katana/conftest.py @@ -100,7 +100,7 @@ def katanagraph_cleaned_8_12_ud(): @pytest.fixture(autouse=True) def networkx_weighted_undirected_8_12(): - df = pd.read_csv("tests/data/edge1.csv") + df = pd.read_csv("metagraph/tests/plugins/katana/data/edge1.csv") em = mg.wrappers.EdgeMap.PandasEdgeMap( df, "Source", "Destination", "Weight", is_directed=False ) @@ -110,7 +110,7 @@ def networkx_weighted_undirected_8_12(): @pytest.fixture(autouse=True) def networkx_weighted_directed_8_12(): - df = pd.read_csv("tests/data/edge1.csv") + df = pd.read_csv("metagraph/tests/plugins/katana/data/edge1.csv") em = mg.wrappers.EdgeMap.PandasEdgeMap( df, "Source", "Destination", "Weight", is_directed=True ) From 5c32c2e793012195d361069228e4bdaaed7fa3a2 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Wed, 19 Jan 2022 17:40:24 +0000 Subject: [PATCH 04/21] enable runslow option --- metagraph/tests/conftest.py | 8 +++++++ metagraph/tests/plugins/katana/conftest.py | 11 ---------- .../tests/plugins/katana/test_algorithms.py | 22 +++++++++---------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/metagraph/tests/conftest.py b/metagraph/tests/conftest.py index 1edc592b..e86cec02 100644 --- a/metagraph/tests/conftest.py +++ b/metagraph/tests/conftest.py @@ -12,3 +12,11 @@ def pytest_addoption(parser): action="store_true", help="Use a DaskResolver instead of the normal Resolver.", ) + parser.addoption( + "--runslow", action="store_true", default=False, help="run slow tests" + ) + + +def pytest_runtest_setup(item): + if "runslow" in item.keywords and not item.config.getoption("--runslow"): + pytest.skip("need --runslow option to run this test") diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katana/conftest.py index 1146c5eb..ef2e760a 100644 --- a/metagraph/tests/plugins/katana/conftest.py +++ b/metagraph/tests/plugins/katana/conftest.py @@ -144,14 +144,3 @@ def nx_from_kg_di_8_12(katanagraph_cleaned_8_12_di): @pytest.fixture(autouse=True) def nx_from_kg_ud_8_12(katanagraph_cleaned_8_12_ud): return mg.translate(katanagraph_cleaned_8_12_ud, mg.wrappers.Graph.NetworkXGraph) - - -def pytest_addoption(parser): - parser.addoption( - "--runslow", action="store_true", default=False, help="run slow tests" - ) - - -def pytest_runtest_setup(item): - if "runslow" in item.keywords and not item.config.getoption("--runslow"): - pytest.skip("need --runslow option to run this test") diff --git a/metagraph/tests/plugins/katana/test_algorithms.py b/metagraph/tests/plugins/katana/test_algorithms.py index d1ad5382..616a91ac 100644 --- a/metagraph/tests/plugins/katana/test_algorithms.py +++ b/metagraph/tests/plugins/katana/test_algorithms.py @@ -45,7 +45,7 @@ def test_sssp_bellman_ford(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): assert distances_nx == distances_kg -# @pytest.mark.runslow +@pytest.mark.runslow def test_sssp_bellman_ford_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -76,7 +76,7 @@ def test_sssp_dijkstra(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): assert distances_nx == distances_kg -# @pytest.mark.runslow +@pytest.mark.runslow def test_sssp_dijkstra_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -99,7 +99,7 @@ def test_connected_components(networkx_weighted_undirected_8_12, kg_from_nx_ud_8 assert cc_kg == cc_nx -# @pytest.mark.runslow +@pytest.mark.runslow def test_connected_components_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -119,7 +119,7 @@ def test_pagerank(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): assert pr_nx == pr_kg -# @pytest.mark.runslow +@pytest.mark.runslow def test_pagerank_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -139,7 +139,7 @@ def test_betweenness_centrality(networkx_weighted_directed_8_12, kg_from_nx_di_8 assert bc_nx == bc_kg -# @pytest.mark.runslow +@pytest.mark.runslow def test_betweenness_centrality_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -160,7 +160,7 @@ def test_triangle_counting(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12 assert tc_nx == tc_kg -# @pytest.mark.runslow +@pytest.mark.runslow def test_triangle_counting_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -185,7 +185,7 @@ def test_louvain_community_detection( assert lc_nx[1] == lc_kg[1] -# @pytest.mark.runslow +@pytest.mark.runslow def test_louvain_community_detection_kg( katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12 ): @@ -216,7 +216,7 @@ def test_translation_subgraph_extraction( assert list(se_nx.value.edges(data=True)) == list(se_kg.value.edges(data=True)) -# @pytest.mark.runslow +@pytest.mark.runslow def test_translation_subgraph_extraction_kg( katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12 ): @@ -241,7 +241,7 @@ def test_labal_propagation(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12 assert cd_nx == cd_kg -# @pytest.mark.runslow +@pytest.mark.runslow def test_labal_propagation_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -268,7 +268,7 @@ def test_jaccard_similarity(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_1 assert jcd_kg[compare_node] == 1 -# @pytest.mark.runslow +@pytest.mark.runslow def test_jaccard_similarity_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -302,7 +302,7 @@ def test_local_clustering_coefficient( assert not np.any(np.isnan(lcc_kg)) -# @pytest.mark.runslow +@pytest.mark.runslow def test_local_clustering_coefficient_kg( katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12 ): From b9dd26a10273e936f2ac37910024bef552164f91 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Wed, 19 Jan 2022 18:34:26 +0000 Subject: [PATCH 05/21] fix the format errors reported from black --- .pre-commit-config.yaml | 2 +- .../tests/plugins/katana/test_translators.py | 15 +-------------- metagraph/tests/plugins/katana/test_types.py | 17 +++-------------- 3 files changed, 5 insertions(+), 29 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 57c25eb2..1c48b356 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: 21.12b0 + rev: 19.10b0 hooks: - id: black language_version: python3.8 diff --git a/metagraph/tests/plugins/katana/test_translators.py b/metagraph/tests/plugins/katana/test_translators.py index 09d4c05a..fbf2420c 100644 --- a/metagraph/tests/plugins/katana/test_translators.py +++ b/metagraph/tests/plugins/katana/test_translators.py @@ -56,20 +56,7 @@ def test_edge_property_directed(kg_from_nx_di_8_12): ) == kg_from_nx_di_8_12.value.get_edge_property("value_from_translator") assert kg_from_nx_di_8_12.value.get_edge_property( "value_from_translator" - ).tolist() == [ - 4, - 2, - 7, - 3, - 5, - 5, - 2, - 8, - 1, - 4, - 4, - 6, - ] + ).tolist() == [4, 2, 7, 3, 5, 5, 2, 8, 1, 4, 4, 6,] def test_compare_node_count(nx_from_kg_di_8_12, katanagraph_cleaned_8_12_di): diff --git a/metagraph/tests/plugins/katana/test_types.py b/metagraph/tests/plugins/katana/test_types.py index 61e3fc78..83990626 100644 --- a/metagraph/tests/plugins/katana/test_types.py +++ b/metagraph/tests/plugins/katana/test_types.py @@ -46,24 +46,13 @@ def test_topology(katanagraph_rmat15_cleaned_di): assert [ katanagraph_rmat15_cleaned_di.value.get_edge_dest(i) for i in katanagraph_rmat15_cleaned_di.value.edge_ids(0) - ][0:5] == [ - 1, - 2, - 3, - 4, - 5, - ] + ][0:5] == [1, 2, 3, 4, 5,] + assert katanagraph_rmat15_cleaned_di.value.edge_ids(8) == range(36475, 41133) assert [ katanagraph_rmat15_cleaned_di.value.get_edge_dest(i) for i in katanagraph_rmat15_cleaned_di.value.edge_ids(8) - ][0:5] == [ - 0, - 9, - 10, - 11, - 12, - ] + ][0:5] == [0, 9, 10, 11, 12,] def test_num_nodes_networkx( From 43b0845b9137a0cde731cfa744c62acd37af6a95 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Wed, 19 Jan 2022 22:44:40 +0000 Subject: [PATCH 06/21] initialize katana.local upon plugin registration --- metagraph/plugins/__init__.py | 2 ++ metagraph/tests/plugins/katana/conftest.py | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metagraph/plugins/__init__.py b/metagraph/plugins/__init__.py index 53de8ee0..156c4243 100644 --- a/metagraph/plugins/__init__.py +++ b/metagraph/plugins/__init__.py @@ -74,11 +74,13 @@ def find_plugins(): from . import core, graphblas, katana, networkx, numpy, pandas, python, scipy + import katana.local # Default Plugins registry.register_from_modules(core) registry.register_from_modules(graphblas, name="core_graphblas") registry.register_from_modules(katana, name="core_katana") + katana.local.initialize() registry.register_from_modules(networkx, name="core_networkx") registry.register_from_modules(numpy, name="core_numpy") registry.register_from_modules(pandas, name="core_pandas") diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katana/conftest.py index ef2e760a..6da2348a 100644 --- a/metagraph/tests/plugins/katana/conftest.py +++ b/metagraph/tests/plugins/katana/conftest.py @@ -15,7 +15,6 @@ # we are using directed graphs with symmetric edges to denote undirected graphs. @pytest.fixture(autouse=True) def pg_rmat15_cleaned_symmetric(): - katana.local.initialize() pg = Graph(get_rdg_dataset("rmat15_cleaned_symmetric")) return pg @@ -41,7 +40,6 @@ def gen_pg_cleaned_8_12_from_csr(is_directed): """ A helper function for the test, generating Katana's Graph from an edge list """ - katana.local.initialize() elist_raw = [ (0, 1, 4), (0, 3, 2), From 35d52cf17db2737288c02534b41802259bf76835 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Wed, 19 Jan 2022 22:48:08 +0000 Subject: [PATCH 07/21] have to import katana.local inside the function only --- metagraph/plugins/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metagraph/plugins/__init__.py b/metagraph/plugins/__init__.py index 156c4243..479871cf 100644 --- a/metagraph/plugins/__init__.py +++ b/metagraph/plugins/__init__.py @@ -74,12 +74,13 @@ def find_plugins(): from . import core, graphblas, katana, networkx, numpy, pandas, python, scipy - import katana.local # Default Plugins registry.register_from_modules(core) registry.register_from_modules(graphblas, name="core_graphblas") registry.register_from_modules(katana, name="core_katana") + import katana.local + katana.local.initialize() registry.register_from_modules(networkx, name="core_networkx") registry.register_from_modules(numpy, name="core_numpy") From f9a15435e44379ddafc31655e493c38a3e1aa69b Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Thu, 20 Jan 2022 01:28:03 +0000 Subject: [PATCH 08/21] fix the KeyError on unweighted edge lists --- metagraph/plugins/__init__.py | 7 ++++--- metagraph/plugins/katana/translators.py | 5 ++++- metagraph/tests/plugins/katana/conftest.py | 5 +++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/metagraph/plugins/__init__.py b/metagraph/plugins/__init__.py index 479871cf..ceb28181 100644 --- a/metagraph/plugins/__init__.py +++ b/metagraph/plugins/__init__.py @@ -79,13 +79,14 @@ def find_plugins(): registry.register_from_modules(core) registry.register_from_modules(graphblas, name="core_graphblas") registry.register_from_modules(katana, name="core_katana") - import katana.local - - katana.local.initialize() registry.register_from_modules(networkx, name="core_networkx") registry.register_from_modules(numpy, name="core_numpy") registry.register_from_modules(pandas, name="core_pandas") registry.register_from_modules(python, name="core_python") registry.register_from_modules(scipy, name="core_scipy") + if has_katana: + import katana.local + + katana.local.initialize() return registry.plugins diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katana/translators.py index 3eea6e19..9c62e77e 100644 --- a/metagraph/plugins/katana/translators.py +++ b/metagraph/plugins/katana/translators.py @@ -31,7 +31,10 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: # build the CSR format from the edge list (weight, (src, dst)) row = np.array([each_edge[0] for each_edge in elist]) col = np.array([each_edge[1] for each_edge in elist]) - data = np.array([each_edge[2]["weight"] for each_edge in elist]) + if is_weighted: + data = np.array([each_edge[2]["weight"] for each_edge in elist]) + else: + data = np.array([0 for each_edge in elist]) csr = csr_matrix((data, (row, col)), shape=(len(nlist), len(nlist))) # call the katana api to build a Graph (unweighted) from the CSR format # noting that the first 0 in csr.indptr is excluded diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katana/conftest.py index 6da2348a..15d27bfe 100644 --- a/metagraph/tests/plugins/katana/conftest.py +++ b/metagraph/tests/plugins/katana/conftest.py @@ -10,11 +10,14 @@ from katana.local import Graph from katana.local.import_data import from_csr +from metagraph.plugins import has_katana # Currently Graph does not support undirected graphs # we are using directed graphs with symmetric edges to denote undirected graphs. @pytest.fixture(autouse=True) def pg_rmat15_cleaned_symmetric(): + if not has_katana: + katana.local.initialize() pg = Graph(get_rdg_dataset("rmat15_cleaned_symmetric")) return pg @@ -40,6 +43,8 @@ def gen_pg_cleaned_8_12_from_csr(is_directed): """ A helper function for the test, generating Katana's Graph from an edge list """ + if not has_katana: + katana.local.initialize() elist_raw = [ (0, 1, 4), (0, 3, 2), From 6665dff1fe47278269d6e2ec4512d2a0ddc2eda7 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Mon, 24 Jan 2022 01:14:12 +0000 Subject: [PATCH 09/21] various fixes on the translators, yet to clean up --- metagraph/plugins/katana/translators.py | 54 ++++++++++++++++++----- metagraph/plugins/katana/types.py | 29 +++++++++++- metagraph/plugins/networkx/types.py | 4 +- metagraph/tests/translators/test_graph.py | 36 +++++++++++++++ 4 files changed, 110 insertions(+), 13 deletions(-) diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katana/translators.py index 9c62e77e..9862964e 100644 --- a/metagraph/plugins/katana/translators.py +++ b/metagraph/plugins/katana/translators.py @@ -15,8 +15,21 @@ @translator def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: + nlist = sorted(list(x.value.nodes(data=True)), key=lambda each: each[0]) + # ranks = np.arange(0, len(nlist)) + # nodes = [each[0] for each in nlist] + # mapping = dict(zip(nodes, ranks)) + # x.value = nx.relabel_nodes(x.value, mapping) aprops = NetworkXGraph.Type.compute_abstract_properties( - x, {"node_dtype", "node_type", "edge_type", "is_directed"} + x, + { + "node_dtype", + "node_type", + "edge_dtype", + "edge_type", + "edge_has_negative_weights", + "is_directed", + }, ) is_weighted = aprops["edge_type"] == "map" # get the edge list directly from the NetworkX Graph @@ -27,13 +40,13 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: else: inv_elist = [(each[1], each[0], each[2]) for each in elist_raw] elist = sorted(elist_raw + inv_elist, key=lambda each: (each[0], each[1])) - nlist = sorted(list(x.value.nodes(data=True)), key=lambda each: each[0]) # build the CSR format from the edge list (weight, (src, dst)) row = np.array([each_edge[0] for each_edge in elist]) col = np.array([each_edge[1] for each_edge in elist]) if is_weighted: data = np.array([each_edge[2]["weight"] for each_edge in elist]) else: + # data = np.array([None for each_edge in elist]) data = np.array([0 for each_edge in elist]) csr = csr_matrix((data, (row, col)), shape=(len(nlist), len(nlist))) # call the katana api to build a Graph (unweighted) from the CSR format @@ -42,6 +55,7 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: # add the edge weight as a new property t = pyarrow.table(dict(value_from_translator=data)) pg.add_edge_property(t) + print("pg:", pg) # use the metagraph's Graph warpper to wrap the katana.local.Graph return KatanaGraph( pg_graph=pg, @@ -49,24 +63,32 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: edge_weight_prop_name="value_from_translator", is_directed=aprops["is_directed"], node_weight_index=0, + node_dtype=aprops["node_dtype"], + edge_dtype=aprops["edge_dtype"], + node_type=aprops["node_type"], + edge_type=aprops["edge_type"], + has_neg_weight=aprops["edge_has_negative_weights"], ) @translator def katanagraph_to_networkx(x: KatanaGraph, **props) -> NetworkXGraph: pg = x.value - dest_list = [ - dest for src in pg for dest in [pg.get_edge_dest(e) for e in pg.edge_ids(src)] - ] - for src in pg: - if pg.edge_ids(src) == range(0, 0): - if src not in dest_list: - raise ValueError("NetworkX does not support graph with isolated nodes") + node_list = [src for src in pg] + # dest_list = [ + # dest for src in pg for dest in [pg.get_edge_dest(e) for e in pg.edge_ids(src)] + # ] + # for src in pg: + # print("src:", src, "id:", pg.edge_ids(src)) + # if pg.edge_ids(src) == range(0, 0): + # if src not in dest_list: + # raise ValueError("NetworkX does not support graph with isolated nodes") edge_dict_count = { (src, dest): 0 for src in pg for dest in [pg.get_edge_dest(e) for e in pg.edge_ids(src)] } + print("edge_dict_count: ", edge_dict_count) for src in pg: for dest in [pg.get_edge_dest(e) for e in pg.edge_ids(src)]: edge_dict_count[(src, dest)] += 1 @@ -88,11 +110,21 @@ def katanagraph_to_networkx(x: KatanaGraph, **props) -> NetworkXGraph: for nid in pg for j in pg.edge_ids(nid) ] - # TODO(pengfei): add more type conversion support: like np.float64 -> float?? + elif isinstance(edge_weights[0], np.float64): + elist = [ + (nid, pg.get_edge_dest(j), float(edge_weights[j])) + for nid in pg + for j in pg.edge_ids(nid) + ] elist = list(OrderedDict.fromkeys(elist)) if x.is_directed: graph = nx.DiGraph() else: graph = nx.Graph() + graph.add_nodes_from(node_list) + print("node list:", node_list) graph.add_weighted_edges_from(elist) - return mg.wrappers.Graph.NetworkXGraph(graph) + print("edge list:", elist) + nwx = mg.wrappers.Graph.NetworkXGraph(graph) + print("nwx from katanax:", nwx) + return nwx diff --git a/metagraph/plugins/katana/types.py b/metagraph/plugins/katana/types.py index 65909a3e..9875b0ec 100644 --- a/metagraph/plugins/katana/types.py +++ b/metagraph/plugins/katana/types.py @@ -19,6 +19,8 @@ def __init__( node_weight_index=None, node_dtype=None, edge_dtype="int", + node_type=None, + edge_type=None, has_neg_weight=False, ): super().__init__() @@ -30,7 +32,29 @@ def __init__( self.node_weight_index = node_weight_index self.node_dtype = node_dtype self.edge_dtype = edge_dtype + self.node_type = node_type + self.edge_type = edge_type self.has_neg_weight = has_neg_weight + print( + "init: ", + self, + ", is_weighted: ", + self.is_weighted, + ", is_directed", + self.is_directed, + ", node_weight_index:", + self.node_weight_index, + ", node_dtype:", + self.node_dtype, + ", edge_dtype:", + self.edge_dtype, + ", node_type:", + self.node_type, + ", edge_type:", + self.edge_type, + ", has_neg_weight:", + self.has_neg_weight, + ) def copy(self): return KatanaGraph( @@ -55,7 +79,10 @@ def _compute_abstract_properties( if prop == "is_directed": ret[prop] = obj.is_directed if prop == "node_type": - ret[prop] = "set" if obj.node_weight_index is None else "map" + if obj.node_type is None: + ret[prop] = "set" if obj.node_weight_index is None else "map" + else: + ret[prop] = obj.node_type if prop == "node_dtype": ret[prop] = ( None if obj.node_weight_index is None else obj.node_dtype diff --git a/metagraph/plugins/networkx/types.py b/metagraph/plugins/networkx/types.py index 91f1a4bd..4ab69189 100644 --- a/metagraph/plugins/networkx/types.py +++ b/metagraph/plugins/networkx/types.py @@ -85,7 +85,8 @@ def _compute_abstract_properties( except KeyError: edge_values = None break - if edge_values: + print("edges before checking:", edge_values) + if edge_values and edge_values != {0}: ret["edge_type"] = "map" if ( "edge_dtype" in slow_props @@ -108,6 +109,7 @@ def _compute_abstract_properties( ret["edge_dtype"] = None ret["edge_has_negative_weights"] = None + print("networkx prop:", ret) return ret @classmethod diff --git a/metagraph/tests/translators/test_graph.py b/metagraph/tests/translators/test_graph.py index fd89f6cc..da10aeba 100644 --- a/metagraph/tests/translators/test_graph.py +++ b/metagraph/tests/translators/test_graph.py @@ -22,6 +22,13 @@ def test_graph_roundtrip_directed_unweighted(default_plugin_resolver): g = nx.DiGraph() g.add_nodes_from([1, 3, 5, 7, 8, 9, 10, 11, 15]) g.add_edges_from([(1, 3), (3, 1), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)]) + + nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + graph = NetworkXGraph(g) rt.verify_round_trip(graph) @@ -29,10 +36,18 @@ def test_graph_roundtrip_directed_unweighted(default_plugin_resolver): g2 = nx.DiGraph() g2.add_nodes_from([1, 3, 5, 7, 8]) g2.add_weighted_edges_from([(1, 3, 2), (3, 5, 4), (5, 7, 6)]) + + nlist2 = sorted(list(g2.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist2)) + nodes = [each[0] for each in nlist2] + mapping = dict(zip(nodes, ranks)) + g2 = nx.relabel_nodes(g2, mapping) + graph2 = NetworkXGraph(g2, aprops={"edge_type": "set"}) rt.verify_round_trip(graph2) +# @pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_directed_weighted(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -43,6 +58,13 @@ def test_graph_roundtrip_directed_weighted(default_plugin_resolver): g.add_weighted_edges_from( [(src, dst, wgt) for (src, dst), wgt in zip(edges, edge_weights)] ) + + nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + rt.verify_round_trip(NetworkXGraph(g)) # float without neg weights g.add_weighted_edges_from( @@ -66,6 +88,7 @@ def test_graph_roundtrip_directed_weighted(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) +@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_directed_symmetric(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -99,6 +122,7 @@ def test_graph_roundtrip_directed_symmetric(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) +@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_undirected_unweighted(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -108,6 +132,7 @@ def test_graph_roundtrip_undirected_unweighted(default_plugin_resolver): rt.verify_round_trip(graph) +@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_undirected_weighted(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -141,6 +166,7 @@ def test_graph_roundtrip_undirected_weighted(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) +@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -165,6 +191,7 @@ def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) +@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_directed_weighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -200,6 +227,7 @@ def test_graph_roundtrip_directed_weighted_nodevals(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) +@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_undirected_unweighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -224,6 +252,7 @@ def test_graph_roundtrip_undirected_unweighted_nodevals(default_plugin_resolver) rt.verify_round_trip(NetworkXGraph(g)) +@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_undirected_weighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -259,6 +288,7 @@ def test_graph_roundtrip_undirected_weighted_nodevals(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) +@pytest.mark.skip(reason="focused debugging") def test_graph_edgeset_oneway_directed(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -272,6 +302,7 @@ def test_graph_edgeset_oneway_directed(default_plugin_resolver): rt.verify_one_way(graph, edgeset) +@pytest.mark.skip(reason="focused debugging") def test_graph_edgeset_oneway_directed_symmetric(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -290,6 +321,7 @@ def test_graph_edgeset_oneway_directed_symmetric(default_plugin_resolver): rt.verify_one_way(graph, edgeset) +@pytest.mark.skip(reason="focused debugging") def test_graph_edgeset_oneway_undirected(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -303,6 +335,7 @@ def test_graph_edgeset_oneway_undirected(default_plugin_resolver): rt.verify_one_way(graph, edgeset) +@pytest.mark.skip(reason="focused debugging") def test_graph_nodeset_oneway(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -317,6 +350,7 @@ def test_graph_nodeset_oneway(default_plugin_resolver): rt.verify_one_way(NetworkXGraph(g), NumpyNodeSet(nodes)) +@pytest.mark.skip(reason="focused debugging") def test_networkx_scipy_graph_from_edgemap(default_plugin_resolver): dpr = default_plugin_resolver g = nx.DiGraph() @@ -335,6 +369,7 @@ def test_networkx_scipy_graph_from_edgemap(default_plugin_resolver): dpr.assert_equal(y, intermediate) +@pytest.mark.skip(reason="focused debugging") def test_networkx_scipy_graph_from_edgeset(default_plugin_resolver): dpr = default_plugin_resolver g = nx.DiGraph() @@ -351,6 +386,7 @@ def test_networkx_scipy_graph_from_edgeset(default_plugin_resolver): dpr.assert_equal(y, intermediate) +@pytest.mark.skip(reason="focused debugging") def test_scipy_graphblas_edgemap(default_plugin_resolver): dpr = default_plugin_resolver # 0 2 7 From 2572d54d802f92f67c3512cc8e11c4ff8eac1acb Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Mon, 24 Jan 2022 01:34:42 +0000 Subject: [PATCH 10/21] add additional type conversion --- metagraph/plugins/katana/translators.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katana/translators.py index 9862964e..feb9fe64 100644 --- a/metagraph/plugins/katana/translators.py +++ b/metagraph/plugins/katana/translators.py @@ -116,6 +116,12 @@ def katanagraph_to_networkx(x: KatanaGraph, **props) -> NetworkXGraph: for nid in pg for j in pg.edge_ids(nid) ] + elif isinstance(edge_weights[0], np.bool_): + elist = [ + (nid, pg.get_edge_dest(j), bool(edge_weights[j])) + for nid in pg + for j in pg.edge_ids(nid) + ] elist = list(OrderedDict.fromkeys(elist)) if x.is_directed: graph = nx.DiGraph() From 03c2702c16135b99c779498276b01b1b6845e342 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Mon, 24 Jan 2022 17:31:18 +0000 Subject: [PATCH 11/21] commit various fixes for now. float accuracy not addressed yet --- metagraph/plugins/__init__.py | 5 -- metagraph/plugins/katana/translators.py | 12 ++-- metagraph/plugins/networkx/types.py | 5 +- metagraph/tests/plugins/katana/conftest.py | 8 +-- metagraph/tests/translators/test_graph.py | 64 +++++++++++++++++----- 5 files changed, 59 insertions(+), 35 deletions(-) diff --git a/metagraph/plugins/__init__.py b/metagraph/plugins/__init__.py index ceb28181..7217b4c9 100644 --- a/metagraph/plugins/__init__.py +++ b/metagraph/plugins/__init__.py @@ -84,9 +84,4 @@ def find_plugins(): registry.register_from_modules(pandas, name="core_pandas") registry.register_from_modules(python, name="core_python") registry.register_from_modules(scipy, name="core_scipy") - if has_katana: - import katana.local - - katana.local.initialize() - return registry.plugins diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katana/translators.py index feb9fe64..1bff8672 100644 --- a/metagraph/plugins/katana/translators.py +++ b/metagraph/plugins/katana/translators.py @@ -38,7 +38,9 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: if aprops["is_directed"]: elist = sorted(elist_raw, key=lambda each: (each[0], each[1])) else: - inv_elist = [(each[1], each[0], each[2]) for each in elist_raw] + inv_elist = [ + (each[1], each[0], each[2]) for each in elist_raw if each[0] != each[1] + ] elist = sorted(elist_raw + inv_elist, key=lambda each: (each[0], each[1])) # build the CSR format from the edge list (weight, (src, dst)) row = np.array([each_edge[0] for each_edge in elist]) @@ -55,7 +57,6 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: # add the edge weight as a new property t = pyarrow.table(dict(value_from_translator=data)) pg.add_edge_property(t) - print("pg:", pg) # use the metagraph's Graph warpper to wrap the katana.local.Graph return KatanaGraph( pg_graph=pg, @@ -88,7 +89,6 @@ def katanagraph_to_networkx(x: KatanaGraph, **props) -> NetworkXGraph: for src in pg for dest in [pg.get_edge_dest(e) for e in pg.edge_ids(src)] } - print("edge_dict_count: ", edge_dict_count) for src in pg: for dest in [pg.get_edge_dest(e) for e in pg.edge_ids(src)]: edge_dict_count[(src, dest)] += 1 @@ -128,9 +128,5 @@ def katanagraph_to_networkx(x: KatanaGraph, **props) -> NetworkXGraph: else: graph = nx.Graph() graph.add_nodes_from(node_list) - print("node list:", node_list) graph.add_weighted_edges_from(elist) - print("edge list:", elist) - nwx = mg.wrappers.Graph.NetworkXGraph(graph) - print("nwx from katanax:", nwx) - return nwx + return mg.wrappers.Graph.NetworkXGraph(graph) diff --git a/metagraph/plugins/networkx/types.py b/metagraph/plugins/networkx/types.py index 4ab69189..fd4e401c 100644 --- a/metagraph/plugins/networkx/types.py +++ b/metagraph/plugins/networkx/types.py @@ -85,7 +85,6 @@ def _compute_abstract_properties( except KeyError: edge_values = None break - print("edges before checking:", edge_values) if edge_values and edge_values != {0}: ret["edge_type"] = "map" if ( @@ -129,8 +128,8 @@ def assert_equal( g1 = obj1.value g2 = obj2.value # Compare - assert ( - g1.nodes() == g2.nodes() + assert sorted(g1.nodes()) == sorted( + g2.nodes() ), f"node mismatch: {g1.nodes()} != {g2.nodes()}" assert ( g1.edges() == g2.edges() diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katana/conftest.py index 15d27bfe..1768a6da 100644 --- a/metagraph/tests/plugins/katana/conftest.py +++ b/metagraph/tests/plugins/katana/conftest.py @@ -10,14 +10,13 @@ from katana.local import Graph from katana.local.import_data import from_csr -from metagraph.plugins import has_katana +katana.local.initialize() # Currently Graph does not support undirected graphs # we are using directed graphs with symmetric edges to denote undirected graphs. @pytest.fixture(autouse=True) def pg_rmat15_cleaned_symmetric(): - if not has_katana: - katana.local.initialize() + # katana.local.initialize() pg = Graph(get_rdg_dataset("rmat15_cleaned_symmetric")) return pg @@ -43,8 +42,7 @@ def gen_pg_cleaned_8_12_from_csr(is_directed): """ A helper function for the test, generating Katana's Graph from an edge list """ - if not has_katana: - katana.local.initialize() + # katana.local.initialize() elist_raw = [ (0, 1, 4), (0, 3, 2), diff --git a/metagraph/tests/translators/test_graph.py b/metagraph/tests/translators/test_graph.py index da10aeba..46471b30 100644 --- a/metagraph/tests/translators/test_graph.py +++ b/metagraph/tests/translators/test_graph.py @@ -47,7 +47,6 @@ def test_graph_roundtrip_directed_unweighted(default_plugin_resolver): rt.verify_round_trip(graph2) -# @pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_directed_weighted(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -88,13 +87,19 @@ def test_graph_roundtrip_directed_weighted(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) -@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_directed_symmetric(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() g.add_nodes_from([1, 3, 5, 7, 8, 9, 10, 11, 15]) edges = [(1, 3), (3, 1), (3, 5), (5, 3), (3, 9), (9, 3), (5, 5), (11, 10), (10, 11)] edge_weights = [1.1, 1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] + + nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + # float with neg weights g.add_weighted_edges_from( [(src, dst, wgt) for (src, dst), wgt in zip(edges, edge_weights)] @@ -128,17 +133,30 @@ def test_graph_roundtrip_undirected_unweighted(default_plugin_resolver): g = nx.Graph() g.add_nodes_from([1, 3, 5, 7, 8, 9, 10, 11, 15]) g.add_edges_from([(1, 3), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)]) + + nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + graph = NetworkXGraph(g) rt.verify_round_trip(graph) -@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_undirected_weighted(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() g.add_nodes_from([1, 3, 5, 7, 8, 9, 10, 11, 15]) edges = [(1, 3), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)] edge_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] + + nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + # float with neg weights g.add_weighted_edges_from( [(src, dst, wgt) for (src, dst), wgt in zip(edges, edge_weights)] @@ -166,7 +184,6 @@ def test_graph_roundtrip_undirected_weighted(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) -@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -175,6 +192,13 @@ def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] g.add_nodes_from(nodes) # nodevals as floats + + nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" ) @@ -191,7 +215,6 @@ def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) -@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_directed_weighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -201,6 +224,13 @@ def test_graph_roundtrip_directed_weighted_nodevals(default_plugin_resolver): edge_weights = [1.1, 2.2, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] g.add_nodes_from(nodes) g.add_edges_from(edges) + + nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + # nodevals as floats, edges as ints nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -227,7 +257,6 @@ def test_graph_roundtrip_directed_weighted_nodevals(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) -@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_undirected_unweighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -235,6 +264,13 @@ def test_graph_roundtrip_undirected_unweighted_nodevals(default_plugin_resolver) nodes = [1, 3, 5, 7, 8, 9, 10, 11, 15] node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] g.add_nodes_from(nodes) + + nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + # nodevals as floats nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -252,7 +288,6 @@ def test_graph_roundtrip_undirected_unweighted_nodevals(default_plugin_resolver) rt.verify_round_trip(NetworkXGraph(g)) -@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_undirected_weighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -262,6 +297,13 @@ def test_graph_roundtrip_undirected_weighted_nodevals(default_plugin_resolver): edge_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] g.add_nodes_from(nodes) g.add_edges_from(edges) + + nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + # nodevals as floats, edges as bools nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -288,7 +330,6 @@ def test_graph_roundtrip_undirected_weighted_nodevals(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) -@pytest.mark.skip(reason="focused debugging") def test_graph_edgeset_oneway_directed(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -302,7 +343,6 @@ def test_graph_edgeset_oneway_directed(default_plugin_resolver): rt.verify_one_way(graph, edgeset) -@pytest.mark.skip(reason="focused debugging") def test_graph_edgeset_oneway_directed_symmetric(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -321,7 +361,6 @@ def test_graph_edgeset_oneway_directed_symmetric(default_plugin_resolver): rt.verify_one_way(graph, edgeset) -@pytest.mark.skip(reason="focused debugging") def test_graph_edgeset_oneway_undirected(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -335,7 +374,6 @@ def test_graph_edgeset_oneway_undirected(default_plugin_resolver): rt.verify_one_way(graph, edgeset) -@pytest.mark.skip(reason="focused debugging") def test_graph_nodeset_oneway(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() @@ -350,7 +388,6 @@ def test_graph_nodeset_oneway(default_plugin_resolver): rt.verify_one_way(NetworkXGraph(g), NumpyNodeSet(nodes)) -@pytest.mark.skip(reason="focused debugging") def test_networkx_scipy_graph_from_edgemap(default_plugin_resolver): dpr = default_plugin_resolver g = nx.DiGraph() @@ -369,7 +406,6 @@ def test_networkx_scipy_graph_from_edgemap(default_plugin_resolver): dpr.assert_equal(y, intermediate) -@pytest.mark.skip(reason="focused debugging") def test_networkx_scipy_graph_from_edgeset(default_plugin_resolver): dpr = default_plugin_resolver g = nx.DiGraph() @@ -386,7 +422,6 @@ def test_networkx_scipy_graph_from_edgeset(default_plugin_resolver): dpr.assert_equal(y, intermediate) -@pytest.mark.skip(reason="focused debugging") def test_scipy_graphblas_edgemap(default_plugin_resolver): dpr = default_plugin_resolver # 0 2 7 @@ -406,6 +441,7 @@ def test_scipy_graphblas_edgemap(default_plugin_resolver): dpr.assert_equal(y, intermediate) +# @pytest.mark.skip(reason="focused debugging") # def test_networkx_2_pandas(default_plugin_resolver): # dpr = default_plugin_resolver # g = nx.DiGraph() From dcb4b4419cc49a50bb27c1644000468af06f10cf Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Thu, 27 Jan 2022 16:35:18 +0000 Subject: [PATCH 12/21] remove duplicated self-edge, add a testcase and various cleanup --- metagraph/plugins/__init__.py | 1 + metagraph/plugins/katana/algorithms.py | 8 +++ metagraph/plugins/katana/translators.py | 3 + metagraph/plugins/katana/types.py | 20 ------ metagraph/plugins/networkx/types.py | 1 - metagraph/tests/plugins/katana/conftest.py | 23 +++++- .../tests/plugins/katana/test_algorithms.py | 10 ++- metagraph/tests/translators/test_graph.py | 72 ------------------- 8 files changed, 43 insertions(+), 95 deletions(-) diff --git a/metagraph/plugins/__init__.py b/metagraph/plugins/__init__.py index 7217b4c9..53de8ee0 100644 --- a/metagraph/plugins/__init__.py +++ b/metagraph/plugins/__init__.py @@ -84,4 +84,5 @@ def find_plugins(): registry.register_from_modules(pandas, name="core_pandas") registry.register_from_modules(python, name="core_python") registry.register_from_modules(scipy, name="core_scipy") + return registry.plugins diff --git a/metagraph/plugins/katana/algorithms.py b/metagraph/plugins/katana/algorithms.py index 6885c4a0..020cfb41 100644 --- a/metagraph/plugins/katana/algorithms.py +++ b/metagraph/plugins/katana/algorithms.py @@ -35,6 +35,13 @@ def kg_bfs_iter( :return: the BFS traversal result in order :rtype: NumpyVectorType """ + g = graph.value + edges = [ + (src, dest) + for src in g + for dest in [g.get_edge_dest(e) for e in g.edge_ids(src)] + ] + edge_weights = g.get_edge_property(graph.edge_weight_prop_name).to_pandas() bfs_prop_name = "bfs_prop_start_from_" + str(source_node) depth_limit_internal = ( 2 ** 30 - 1 if depth_limit == -1 else depth_limit @@ -42,6 +49,7 @@ def kg_bfs_iter( start_node = source_node if not has_node_prop(graph.value, bfs_prop_name): bfs(graph.value, start_node, bfs_prop_name) + bfs_list_1st = graph.value.get_node_property(bfs_prop_name).to_numpy() pg_bfs_list = ( graph.value.get_node_property(bfs_prop_name).to_pandas().values.tolist() ) diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katana/translators.py index 1bff8672..9b168eb8 100644 --- a/metagraph/plugins/katana/translators.py +++ b/metagraph/plugins/katana/translators.py @@ -4,6 +4,8 @@ import networkx as nx import numpy as np import pyarrow +import katana.local + from metagraph import translator from metagraph.plugins.networkx.types import NetworkXGraph from scipy.sparse import csr_matrix @@ -53,6 +55,7 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: csr = csr_matrix((data, (row, col)), shape=(len(nlist), len(nlist))) # call the katana api to build a Graph (unweighted) from the CSR format # noting that the first 0 in csr.indptr is excluded + katana.local.initialize() pg = from_csr(csr.indptr[1:], csr.indices) # add the edge weight as a new property t = pyarrow.table(dict(value_from_translator=data)) diff --git a/metagraph/plugins/katana/types.py b/metagraph/plugins/katana/types.py index 9875b0ec..1b8b543b 100644 --- a/metagraph/plugins/katana/types.py +++ b/metagraph/plugins/katana/types.py @@ -35,26 +35,6 @@ def __init__( self.node_type = node_type self.edge_type = edge_type self.has_neg_weight = has_neg_weight - print( - "init: ", - self, - ", is_weighted: ", - self.is_weighted, - ", is_directed", - self.is_directed, - ", node_weight_index:", - self.node_weight_index, - ", node_dtype:", - self.node_dtype, - ", edge_dtype:", - self.edge_dtype, - ", node_type:", - self.node_type, - ", edge_type:", - self.edge_type, - ", has_neg_weight:", - self.has_neg_weight, - ) def copy(self): return KatanaGraph( diff --git a/metagraph/plugins/networkx/types.py b/metagraph/plugins/networkx/types.py index fd4e401c..9f87e71d 100644 --- a/metagraph/plugins/networkx/types.py +++ b/metagraph/plugins/networkx/types.py @@ -108,7 +108,6 @@ def _compute_abstract_properties( ret["edge_dtype"] = None ret["edge_has_negative_weights"] = None - print("networkx prop:", ret) return ret @classmethod diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katana/conftest.py index 1768a6da..7f92d93a 100644 --- a/metagraph/tests/plugins/katana/conftest.py +++ b/metagraph/tests/plugins/katana/conftest.py @@ -64,7 +64,9 @@ def gen_pg_cleaned_8_12_from_csr(is_directed): if is_directed: elist = sorted(elist_raw, key=lambda each: (each[0], each[1])) else: - inv_elist = [(each[1], each[0], each[2]) for each in elist_raw] + inv_elist = [ + (each[1], each[0], each[2]) for each in elist_raw if each[0] != each[1] + ] elist = sorted(elist_raw + inv_elist, key=lambda each: (each[0], each[1])) nlist = sorted(nlist_raw, key=lambda each: each) # build the CSR format from the edge list (weight, (src, dst)) @@ -119,6 +121,25 @@ def networkx_weighted_directed_8_12(): return graph1 +@pytest.fixture(autouse=True) +def networkx_weighted_directed_bfs(): + df = pd.read_csv("metagraph/tests/plugins/katana/data/edge2.csv") + em = mg.wrappers.EdgeMap.PandasEdgeMap( + df, "Source", "Destination", "Weight", is_directed=True + ) + graph1 = mg.algos.util.graph.build(em) + return graph1 + + +# directed graph +@pytest.fixture(autouse=True) +def kg_from_nx_di_bfs(networkx_weighted_directed_bfs): + pg_test_case = mg.translate( + networkx_weighted_directed_bfs, mg.wrappers.Graph.KatanaGraph + ) + return pg_test_case + + # directed graph @pytest.fixture(autouse=True) def kg_from_nx_di_8_12(networkx_weighted_directed_8_12): diff --git a/metagraph/tests/plugins/katana/test_algorithms.py b/metagraph/tests/plugins/katana/test_algorithms.py index 616a91ac..dd34ba1d 100644 --- a/metagraph/tests/plugins/katana/test_algorithms.py +++ b/metagraph/tests/plugins/katana/test_algorithms.py @@ -3,6 +3,14 @@ import pytest +@pytest.mark.xfail(reason="until BFS fix") +def test_bfs_iter(networkx_weighted_directed_bfs, kg_from_nx_di_bfs): + bfs1_nx = mg.algos.traversal.bfs_iter(networkx_weighted_directed_bfs, 0) + bfs1_kg = mg.algos.traversal.bfs_iter(kg_from_nx_di_bfs, 0) + assert bfs1_kg.tolist() == bfs1_nx.tolist() + assert bfs1_kg.tolist() == [0, 3, 4, 5, 6, 2, 7] + + def test_bfs(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): bfs1_nx = mg.algos.traversal.bfs_iter(networkx_weighted_directed_8_12, 0) bfs2_nx = mg.algos.traversal.bfs_iter(networkx_weighted_directed_8_12, 2) @@ -122,7 +130,7 @@ def test_pagerank(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): @pytest.mark.runslow def test_pagerank_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): """ - test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code + test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs """ pr_kg1 = mg.algos.centrality.pagerank(katanagraph_cleaned_8_12_di) pr_kg2 = mg.algos.centrality.pagerank(katanagraph_cleaned_8_12_di) diff --git a/metagraph/tests/translators/test_graph.py b/metagraph/tests/translators/test_graph.py index 46471b30..fd89f6cc 100644 --- a/metagraph/tests/translators/test_graph.py +++ b/metagraph/tests/translators/test_graph.py @@ -22,13 +22,6 @@ def test_graph_roundtrip_directed_unweighted(default_plugin_resolver): g = nx.DiGraph() g.add_nodes_from([1, 3, 5, 7, 8, 9, 10, 11, 15]) g.add_edges_from([(1, 3), (3, 1), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)]) - - nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist)) - nodes = [each[0] for each in nlist] - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - graph = NetworkXGraph(g) rt.verify_round_trip(graph) @@ -36,13 +29,6 @@ def test_graph_roundtrip_directed_unweighted(default_plugin_resolver): g2 = nx.DiGraph() g2.add_nodes_from([1, 3, 5, 7, 8]) g2.add_weighted_edges_from([(1, 3, 2), (3, 5, 4), (5, 7, 6)]) - - nlist2 = sorted(list(g2.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist2)) - nodes = [each[0] for each in nlist2] - mapping = dict(zip(nodes, ranks)) - g2 = nx.relabel_nodes(g2, mapping) - graph2 = NetworkXGraph(g2, aprops={"edge_type": "set"}) rt.verify_round_trip(graph2) @@ -57,13 +43,6 @@ def test_graph_roundtrip_directed_weighted(default_plugin_resolver): g.add_weighted_edges_from( [(src, dst, wgt) for (src, dst), wgt in zip(edges, edge_weights)] ) - - nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist)) - nodes = [each[0] for each in nlist] - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - rt.verify_round_trip(NetworkXGraph(g)) # float without neg weights g.add_weighted_edges_from( @@ -93,13 +72,6 @@ def test_graph_roundtrip_directed_symmetric(default_plugin_resolver): g.add_nodes_from([1, 3, 5, 7, 8, 9, 10, 11, 15]) edges = [(1, 3), (3, 1), (3, 5), (5, 3), (3, 9), (9, 3), (5, 5), (11, 10), (10, 11)] edge_weights = [1.1, 1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] - - nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist)) - nodes = [each[0] for each in nlist] - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - # float with neg weights g.add_weighted_edges_from( [(src, dst, wgt) for (src, dst), wgt in zip(edges, edge_weights)] @@ -127,19 +99,11 @@ def test_graph_roundtrip_directed_symmetric(default_plugin_resolver): rt.verify_round_trip(NetworkXGraph(g)) -@pytest.mark.skip(reason="focused debugging") def test_graph_roundtrip_undirected_unweighted(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.Graph() g.add_nodes_from([1, 3, 5, 7, 8, 9, 10, 11, 15]) g.add_edges_from([(1, 3), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)]) - - nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist)) - nodes = [each[0] for each in nlist] - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - graph = NetworkXGraph(g) rt.verify_round_trip(graph) @@ -150,13 +114,6 @@ def test_graph_roundtrip_undirected_weighted(default_plugin_resolver): g.add_nodes_from([1, 3, 5, 7, 8, 9, 10, 11, 15]) edges = [(1, 3), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)] edge_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] - - nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist)) - nodes = [each[0] for each in nlist] - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - # float with neg weights g.add_weighted_edges_from( [(src, dst, wgt) for (src, dst), wgt in zip(edges, edge_weights)] @@ -192,13 +149,6 @@ def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] g.add_nodes_from(nodes) # nodevals as floats - - nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist)) - nodes = [each[0] for each in nlist] - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" ) @@ -224,13 +174,6 @@ def test_graph_roundtrip_directed_weighted_nodevals(default_plugin_resolver): edge_weights = [1.1, 2.2, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] g.add_nodes_from(nodes) g.add_edges_from(edges) - - nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist)) - nodes = [each[0] for each in nlist] - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - # nodevals as floats, edges as ints nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -264,13 +207,6 @@ def test_graph_roundtrip_undirected_unweighted_nodevals(default_plugin_resolver) nodes = [1, 3, 5, 7, 8, 9, 10, 11, 15] node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] g.add_nodes_from(nodes) - - nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist)) - nodes = [each[0] for each in nlist] - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - # nodevals as floats nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -297,13 +233,6 @@ def test_graph_roundtrip_undirected_weighted_nodevals(default_plugin_resolver): edge_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] g.add_nodes_from(nodes) g.add_edges_from(edges) - - nlist = sorted(list(g.nodes(data=True)), key=lambda each: each[0]) - ranks = np.arange(0, len(nlist)) - nodes = [each[0] for each in nlist] - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - # nodevals as floats, edges as bools nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -441,7 +370,6 @@ def test_scipy_graphblas_edgemap(default_plugin_resolver): dpr.assert_equal(y, intermediate) -# @pytest.mark.skip(reason="focused debugging") # def test_networkx_2_pandas(default_plugin_resolver): # dpr = default_plugin_resolver # g = nx.DiGraph() From f3188d8ba1300564a501c100c03cfa1bdf0a2cb1 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Thu, 27 Jan 2022 16:49:20 +0000 Subject: [PATCH 13/21] include data directly as string array --- metagraph/tests/plugins/katana/conftest.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katana/conftest.py index 7f92d93a..1235c21b 100644 --- a/metagraph/tests/plugins/katana/conftest.py +++ b/metagraph/tests/plugins/katana/conftest.py @@ -1,3 +1,4 @@ +import io import metagraph as mg import numpy as np import pandas as pd @@ -123,7 +124,21 @@ def networkx_weighted_directed_8_12(): @pytest.fixture(autouse=True) def networkx_weighted_directed_bfs(): - df = pd.read_csv("metagraph/tests/plugins/katana/data/edge2.csv") + data = """ +Source,Destination,Weight +0,3,1 +1,0,2 +1,4,3 +2,4,4 +2,5,5 +2,7,6 +3,4,8 +4,5,9 +5,6,10 +6,2,11 +""" + csv_file = io.StringIO(data) + df = pd.read_csv(csv_file) em = mg.wrappers.EdgeMap.PandasEdgeMap( df, "Source", "Destination", "Weight", is_directed=True ) From 312ab32d1226712a349b12179094a90f14ad1865 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Thu, 27 Jan 2022 17:54:23 +0000 Subject: [PATCH 14/21] fix the precision error introduced by accident --- metagraph/plugins/katana/translators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katana/translators.py index 9b168eb8..a2c0dfda 100644 --- a/metagraph/plugins/katana/translators.py +++ b/metagraph/plugins/katana/translators.py @@ -130,6 +130,5 @@ def katanagraph_to_networkx(x: KatanaGraph, **props) -> NetworkXGraph: graph = nx.DiGraph() else: graph = nx.Graph() - graph.add_nodes_from(node_list) graph.add_weighted_edges_from(elist) return mg.wrappers.Graph.NetworkXGraph(graph) From 3b3a3922ad7e4f97456b172569402271294947fe Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Thu, 27 Jan 2022 19:33:07 +0000 Subject: [PATCH 15/21] add one-way node relabeling from nx to katana --- metagraph/plugins/katana/translators.py | 9 +++++---- metagraph/tests/translators/test_graph.py | 24 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katana/translators.py index a2c0dfda..340a92e8 100644 --- a/metagraph/plugins/katana/translators.py +++ b/metagraph/plugins/katana/translators.py @@ -18,10 +18,10 @@ @translator def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: nlist = sorted(list(x.value.nodes(data=True)), key=lambda each: each[0]) - # ranks = np.arange(0, len(nlist)) - # nodes = [each[0] for each in nlist] - # mapping = dict(zip(nodes, ranks)) - # x.value = nx.relabel_nodes(x.value, mapping) + ranks = np.arange(0, len(nlist)) + nodes = [each[0] for each in nlist] + mapping = dict(zip(nodes, ranks)) + x.value = nx.relabel_nodes(x.value, mapping) aprops = NetworkXGraph.Type.compute_abstract_properties( x, { @@ -131,4 +131,5 @@ def katanagraph_to_networkx(x: KatanaGraph, **props) -> NetworkXGraph: else: graph = nx.Graph() graph.add_weighted_edges_from(elist) + graph.add_nodes_from(node_list) # add node list after edge list return mg.wrappers.Graph.NetworkXGraph(graph) diff --git a/metagraph/tests/translators/test_graph.py b/metagraph/tests/translators/test_graph.py index fd89f6cc..96560b87 100644 --- a/metagraph/tests/translators/test_graph.py +++ b/metagraph/tests/translators/test_graph.py @@ -148,6 +148,12 @@ def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): nodes = [1, 3, 5, 7, 8, 9, 10, 11, 15] node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] g.add_nodes_from(nodes) + + nlist = sorted(nodes) + ranks = np.arange(0, len(nlist)) + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + # nodevals as floats nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -174,6 +180,12 @@ def test_graph_roundtrip_directed_weighted_nodevals(default_plugin_resolver): edge_weights = [1.1, 2.2, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] g.add_nodes_from(nodes) g.add_edges_from(edges) + + nlist = sorted(nodes) + ranks = np.arange(0, len(nlist)) + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + # nodevals as floats, edges as ints nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -207,6 +219,12 @@ def test_graph_roundtrip_undirected_unweighted_nodevals(default_plugin_resolver) nodes = [1, 3, 5, 7, 8, 9, 10, 11, 15] node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] g.add_nodes_from(nodes) + + nlist = sorted(nodes) + ranks = np.arange(0, len(nlist)) + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + # nodevals as floats nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -233,6 +251,12 @@ def test_graph_roundtrip_undirected_weighted_nodevals(default_plugin_resolver): edge_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] g.add_nodes_from(nodes) g.add_edges_from(edges) + + nlist = sorted(nodes) + ranks = np.arange(0, len(nlist)) + mapping = dict(zip(nodes, ranks)) + g = nx.relabel_nodes(g, mapping) + # nodevals as floats, edges as bools nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" From 6c9830aff23585416a6f4f019153fa65e1711d2d Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Fri, 28 Jan 2022 19:16:29 +0000 Subject: [PATCH 16/21] tweak the order of node addition and weight addition for pagerank --- metagraph/plugins/katana/translators.py | 52 +++++++++++++++++-- metagraph/plugins/katana/types.py | 4 ++ .../tests/plugins/katana/test_translators.py | 7 +-- metagraph/tests/translators/test_graph.py | 31 ++--------- 4 files changed, 60 insertions(+), 34 deletions(-) diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katana/translators.py index 340a92e8..9155918d 100644 --- a/metagraph/plugins/katana/translators.py +++ b/metagraph/plugins/katana/translators.py @@ -21,7 +21,8 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: ranks = np.arange(0, len(nlist)) nodes = [each[0] for each in nlist] mapping = dict(zip(nodes, ranks)) - x.value = nx.relabel_nodes(x.value, mapping) + # relabel Node IDs without changing the original graph + xval_map = nx.relabel_nodes(x.value, mapping) aprops = NetworkXGraph.Type.compute_abstract_properties( x, { @@ -35,7 +36,7 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: ) is_weighted = aprops["edge_type"] == "map" # get the edge list directly from the NetworkX Graph - elist_raw = list(x.value.edges(data=True)) + elist_raw = list(xval_map.edges(data=True)) # sort the eddge list and node list if aprops["is_directed"]: elist = sorted(elist_raw, key=lambda each: (each[0], each[1])) @@ -58,13 +59,28 @@ def networkx_to_katanagraph(x: NetworkXGraph, **props) -> KatanaGraph: katana.local.initialize() pg = from_csr(csr.indptr[1:], csr.indices) # add the edge weight as a new property - t = pyarrow.table(dict(value_from_translator=data)) + t = pyarrow.table(dict(edge_value_from_translator=data)) pg.add_edge_property(t) + node_list = [nid for nid in nodes] + node_rmap = pyarrow.table(dict(node_id_reverse_map=node_list)) + pg.add_node_property(node_rmap) + node_id_map_prop_name = "node_id_reverse_map" + + node_attributes = nx.get_node_attributes(x.value, "weight") + node_weight_prop_name = None + if node_attributes: + weights = [node_attributes[node] for node in node_list] + node_weight_prop = pyarrow.table(dict(node_value_from_translator=weights)) + node_weight_prop_name = "node_value_from_translator" + pg.add_node_property(node_weight_prop) + # use the metagraph's Graph warpper to wrap the katana.local.Graph return KatanaGraph( pg_graph=pg, is_weighted=is_weighted, - edge_weight_prop_name="value_from_translator", + edge_weight_prop_name="edge_value_from_translator", + node_weight_prop_name=node_weight_prop_name, + node_id_map_prop_name=node_id_map_prop_name, is_directed=aprops["is_directed"], node_weight_index=0, node_dtype=aprops["node_dtype"], @@ -130,6 +146,32 @@ def katanagraph_to_networkx(x: KatanaGraph, **props) -> NetworkXGraph: graph = nx.DiGraph() else: graph = nx.Graph() + # add node list first for the same order as weights graph.add_weighted_edges_from(elist) - graph.add_nodes_from(node_list) # add node list after edge list + graph.add_nodes_from(node_list) + + # remap Node IDs if needed + if x.node_id_map_prop_name: + nodeid_map = pg.get_node_property(x.node_id_map_prop_name).to_pandas() + ranks = np.arange(0, len(nodeid_map)) + mapping = dict(zip(ranks, nodeid_map)) + graph = nx.relabel_nodes(graph, mapping) + + # retrieve node weights and set the graph + if x.node_weight_prop_name: + nodes = graph.nodes() + nlist = [] + node_weights = pg.get_node_property(x.node_weight_prop_name).to_pandas() + if isinstance(node_weights[0], np.int64): + nlist = [int(wgt) for wgt in node_weights] + elif isinstance(node_weights[0], pyarrow.lib.Int64Scalar): + nlist = [wgt.as_py() for wgt in node_weights] + elif isinstance(node_weights[0], np.float64): + nlist = [float(wgt) for wgt in node_weights] + elif isinstance(node_weights[0], np.bool_): + nlist = [bool(wgt) for wgt in node_weights] + nx.set_node_attributes( + graph, {node: wgt for node, wgt in zip(nodeid_map, nlist)}, name="weight" + ) + return mg.wrappers.Graph.NetworkXGraph(graph) diff --git a/metagraph/plugins/katana/types.py b/metagraph/plugins/katana/types.py index 1b8b543b..455e7209 100644 --- a/metagraph/plugins/katana/types.py +++ b/metagraph/plugins/katana/types.py @@ -15,6 +15,8 @@ def __init__( pg_graph, is_weighted=True, edge_weight_prop_name="value", + node_weight_prop_name=None, + node_id_map_prop_name=None, is_directed=True, node_weight_index=None, node_dtype=None, @@ -28,6 +30,8 @@ def __init__( self.value = pg_graph self.is_weighted = is_weighted self.edge_weight_prop_name = edge_weight_prop_name + self.node_weight_prop_name = node_weight_prop_name + self.node_id_map_prop_name = node_id_map_prop_name self.is_directed = is_directed self.node_weight_index = node_weight_index self.node_dtype = node_dtype diff --git a/metagraph/tests/plugins/katana/test_translators.py b/metagraph/tests/plugins/katana/test_translators.py index fbf2420c..0c95dba6 100644 --- a/metagraph/tests/plugins/katana/test_translators.py +++ b/metagraph/tests/plugins/katana/test_translators.py @@ -49,13 +49,14 @@ def test_schema(kg_from_nx_di_8_12): def test_edge_property_directed(kg_from_nx_di_8_12): assert ( - kg_from_nx_di_8_12.value.loaded_edge_schema()[0].name == "value_from_translator" + kg_from_nx_di_8_12.value.loaded_edge_schema()[0].name + == "edge_value_from_translator" ) assert kg_from_nx_di_8_12.value.get_edge_property( 0 - ) == kg_from_nx_di_8_12.value.get_edge_property("value_from_translator") + ) == kg_from_nx_di_8_12.value.get_edge_property("edge_value_from_translator") assert kg_from_nx_di_8_12.value.get_edge_property( - "value_from_translator" + "edge_value_from_translator" ).tolist() == [4, 2, 7, 3, 5, 5, 2, 8, 1, 4, 4, 6,] diff --git a/metagraph/tests/translators/test_graph.py b/metagraph/tests/translators/test_graph.py index 96560b87..cb89b2c4 100644 --- a/metagraph/tests/translators/test_graph.py +++ b/metagraph/tests/translators/test_graph.py @@ -33,6 +33,7 @@ def test_graph_roundtrip_directed_unweighted(default_plugin_resolver): rt.verify_round_trip(graph2) +@pytest.mark.skip("focus on one") def test_graph_roundtrip_directed_weighted(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -144,16 +145,12 @@ def test_graph_roundtrip_undirected_weighted(default_plugin_resolver): def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() - g.add_edges_from([(1, 3), (3, 1), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)]) + nodes = [1, 3, 5, 7, 8, 9, 10, 11, 15] - node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] + # have to add ndoes first to keep the same order with the weights g.add_nodes_from(nodes) - - nlist = sorted(nodes) - ranks = np.arange(0, len(nlist)) - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - + g.add_edges_from([(1, 3), (3, 1), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)]) + node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] # nodevals as floats nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -180,12 +177,6 @@ def test_graph_roundtrip_directed_weighted_nodevals(default_plugin_resolver): edge_weights = [1.1, 2.2, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] g.add_nodes_from(nodes) g.add_edges_from(edges) - - nlist = sorted(nodes) - ranks = np.arange(0, len(nlist)) - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - # nodevals as floats, edges as ints nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -219,12 +210,6 @@ def test_graph_roundtrip_undirected_unweighted_nodevals(default_plugin_resolver) nodes = [1, 3, 5, 7, 8, 9, 10, 11, 15] node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] g.add_nodes_from(nodes) - - nlist = sorted(nodes) - ranks = np.arange(0, len(nlist)) - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - # nodevals as floats nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" @@ -251,12 +236,6 @@ def test_graph_roundtrip_undirected_weighted_nodevals(default_plugin_resolver): edge_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0] g.add_nodes_from(nodes) g.add_edges_from(edges) - - nlist = sorted(nodes) - ranks = np.arange(0, len(nlist)) - mapping = dict(zip(nodes, ranks)) - g = nx.relabel_nodes(g, mapping) - # nodevals as floats, edges as bools nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" From 1824decc61b033405692bf2292e7f46c85743bba Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Fri, 28 Jan 2022 19:22:27 +0000 Subject: [PATCH 17/21] update the node schema with a new property --- metagraph/tests/plugins/katana/test_translators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagraph/tests/plugins/katana/test_translators.py b/metagraph/tests/plugins/katana/test_translators.py index 0c95dba6..4456434e 100644 --- a/metagraph/tests/plugins/katana/test_translators.py +++ b/metagraph/tests/plugins/katana/test_translators.py @@ -43,7 +43,7 @@ def test_topology(kg_from_nx_di_8_12): def test_schema(kg_from_nx_di_8_12): - assert len(kg_from_nx_di_8_12.value.loaded_node_schema()) == 0 + assert len(kg_from_nx_di_8_12.value.loaded_node_schema()) == 1 assert len(kg_from_nx_di_8_12.value.loaded_edge_schema()) == 1 From 9b1b1d6b569795919f68ac0e1a795c631730a101 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Fri, 28 Jan 2022 19:35:01 +0000 Subject: [PATCH 18/21] revert the change to metegraph core --- metagraph/plugins/networkx/types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagraph/plugins/networkx/types.py b/metagraph/plugins/networkx/types.py index 9f87e71d..47f206e4 100644 --- a/metagraph/plugins/networkx/types.py +++ b/metagraph/plugins/networkx/types.py @@ -127,8 +127,8 @@ def assert_equal( g1 = obj1.value g2 = obj2.value # Compare - assert sorted(g1.nodes()) == sorted( - g2.nodes() + assert ( + g1.nodes() == g2.nodes() ), f"node mismatch: {g1.nodes()} != {g2.nodes()}" assert ( g1.edges() == g2.edges() From 319878dcad71d43b8aab91ec0612e06f0e2af532 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Fri, 28 Jan 2022 19:49:19 +0000 Subject: [PATCH 19/21] remove the pytest mark --- metagraph/tests/translators/test_graph.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metagraph/tests/translators/test_graph.py b/metagraph/tests/translators/test_graph.py index cb89b2c4..2bbce040 100644 --- a/metagraph/tests/translators/test_graph.py +++ b/metagraph/tests/translators/test_graph.py @@ -33,7 +33,6 @@ def test_graph_roundtrip_directed_unweighted(default_plugin_resolver): rt.verify_round_trip(graph2) -@pytest.mark.skip("focus on one") def test_graph_roundtrip_directed_weighted(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() @@ -145,9 +144,8 @@ def test_graph_roundtrip_undirected_weighted(default_plugin_resolver): def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() - nodes = [1, 3, 5, 7, 8, 9, 10, 11, 15] - # have to add ndoes first to keep the same order with the weights + # have to add nodes first to keep the same order with the weights g.add_nodes_from(nodes) g.add_edges_from([(1, 3), (3, 1), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)]) node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] From 2c7a25814201c8ce4e8e31767d28afd4800e6136 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Sat, 29 Jan 2022 09:57:40 -0500 Subject: [PATCH 20/21] remove edge1.csv to avoid fixed path and revert changes to test_graph.py --- metagraph/tests/plugins/katana/conftest.py | 53 +++++++++++++------ metagraph/tests/plugins/katana/data/edge1.csv | 13 ----- metagraph/tests/translators/test_graph.py | 5 +- 3 files changed, 39 insertions(+), 32 deletions(-) delete mode 100644 metagraph/tests/plugins/katana/data/edge1.csv diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katana/conftest.py index 1235c21b..7f68dca2 100644 --- a/metagraph/tests/plugins/katana/conftest.py +++ b/metagraph/tests/plugins/katana/conftest.py @@ -13,6 +13,38 @@ katana.local.initialize() +data_8_12 = """ +Source,Destination,Weight +0,1,4 +0,3,2 +0,4,7 +1,3,3 +1,4,5 +2,4,5 +2,5,2 +2,6,8 +3,4,1 +4,7,4 +5,6,4 +5,7,6 +""" + + +data_8_11 = """ +Source,Destination,Weight +0,3,1 +1,0,2 +1,4,3 +2,4,4 +2,5,5 +2,7,6 +3,4,8 +4,5,9 +5,6,10 +6,2,11 +""" + + # Currently Graph does not support undirected graphs # we are using directed graphs with symmetric edges to denote undirected graphs. @pytest.fixture(autouse=True) @@ -104,7 +136,8 @@ def katanagraph_cleaned_8_12_ud(): @pytest.fixture(autouse=True) def networkx_weighted_undirected_8_12(): - df = pd.read_csv("metagraph/tests/plugins/katana/data/edge1.csv") + csv_file = io.StringIO(data_8_12) + df = pd.read_csv(csv_file) em = mg.wrappers.EdgeMap.PandasEdgeMap( df, "Source", "Destination", "Weight", is_directed=False ) @@ -114,7 +147,8 @@ def networkx_weighted_undirected_8_12(): @pytest.fixture(autouse=True) def networkx_weighted_directed_8_12(): - df = pd.read_csv("metagraph/tests/plugins/katana/data/edge1.csv") + csv_file = io.StringIO(data_8_12) + df = pd.read_csv(csv_file) em = mg.wrappers.EdgeMap.PandasEdgeMap( df, "Source", "Destination", "Weight", is_directed=True ) @@ -124,20 +158,7 @@ def networkx_weighted_directed_8_12(): @pytest.fixture(autouse=True) def networkx_weighted_directed_bfs(): - data = """ -Source,Destination,Weight -0,3,1 -1,0,2 -1,4,3 -2,4,4 -2,5,5 -2,7,6 -3,4,8 -4,5,9 -5,6,10 -6,2,11 -""" - csv_file = io.StringIO(data) + csv_file = io.StringIO(data_8_11) df = pd.read_csv(csv_file) em = mg.wrappers.EdgeMap.PandasEdgeMap( df, "Source", "Destination", "Weight", is_directed=True diff --git a/metagraph/tests/plugins/katana/data/edge1.csv b/metagraph/tests/plugins/katana/data/edge1.csv deleted file mode 100644 index 900a0460..00000000 --- a/metagraph/tests/plugins/katana/data/edge1.csv +++ /dev/null @@ -1,13 +0,0 @@ -Source,Destination,Weight -0,1,4 -0,3,2 -0,4,7 -1,3,3 -1,4,5 -2,4,5 -2,5,2 -2,6,8 -3,4,1 -4,7,4 -5,6,4 -5,7,6 \ No newline at end of file diff --git a/metagraph/tests/translators/test_graph.py b/metagraph/tests/translators/test_graph.py index 2bbce040..fd89f6cc 100644 --- a/metagraph/tests/translators/test_graph.py +++ b/metagraph/tests/translators/test_graph.py @@ -144,11 +144,10 @@ def test_graph_roundtrip_undirected_weighted(default_plugin_resolver): def test_graph_roundtrip_directed_unweighted_nodevals(default_plugin_resolver): rt = RoundTripper(default_plugin_resolver) g = nx.DiGraph() - nodes = [1, 3, 5, 7, 8, 9, 10, 11, 15] - # have to add nodes first to keep the same order with the weights - g.add_nodes_from(nodes) g.add_edges_from([(1, 3), (3, 1), (3, 5), (5, 7), (7, 9), (9, 3), (5, 5), (11, 10)]) + nodes = [1, 3, 5, 7, 8, 9, 10, 11, 15] node_weights = [1.1, 0.0, -4.4, 4.4, 6.5, 1.2, 2.0, 0.01, 15.2] + g.add_nodes_from(nodes) # nodevals as floats nx.set_node_attributes( g, {node: wgt for node, wgt in zip(nodes, node_weights)}, name="weight" From 01877afe1e29fb267b4fd2e12d3fc0933bab6052 Mon Sep 17 00:00:00 2001 From: wkyu2kg Date: Sun, 30 Jan 2022 10:41:35 -0500 Subject: [PATCH 21/21] rename file paths for consistency between plugin and internal copy --- metagraph/plugins/__init__.py | 10 +++++----- metagraph/plugins/{katana => katanagraph}/__init__.py | 0 .../plugins/{katana => katanagraph}/algorithms.py | 0 .../plugins/{katana => katanagraph}/translators.py | 0 metagraph/plugins/{katana => katanagraph}/types.py | 0 metagraph/tests/conftest.py | 8 -------- .../tests/plugins/{katana => katanagraph}/conftest.py | 0 .../{katana => katanagraph}/test_algorithms.py | 11 ----------- .../{katana => katanagraph}/test_translators.py | 0 .../plugins/{katana => katanagraph}/test_types.py | 0 10 files changed, 5 insertions(+), 24 deletions(-) rename metagraph/plugins/{katana => katanagraph}/__init__.py (100%) rename metagraph/plugins/{katana => katanagraph}/algorithms.py (100%) rename metagraph/plugins/{katana => katanagraph}/translators.py (100%) rename metagraph/plugins/{katana => katanagraph}/types.py (100%) rename metagraph/tests/plugins/{katana => katanagraph}/conftest.py (100%) rename metagraph/tests/plugins/{katana => katanagraph}/test_algorithms.py (98%) rename metagraph/tests/plugins/{katana => katanagraph}/test_translators.py (100%) rename metagraph/tests/plugins/{katana => katanagraph}/test_types.py (100%) diff --git a/metagraph/plugins/__init__.py b/metagraph/plugins/__init__.py index 53de8ee0..74b1dbb1 100644 --- a/metagraph/plugins/__init__.py +++ b/metagraph/plugins/__init__.py @@ -49,11 +49,11 @@ has_grblas = False try: - import katana as _ + import katanagraph as _ - has_katana = True + has_katanagraph = True except ImportError: # pragma: no cover - has_katana = False + has_katanagraph = False try: import numba as _ @@ -73,12 +73,12 @@ def find_plugins(): - from . import core, graphblas, katana, networkx, numpy, pandas, python, scipy + from . import core, graphblas, katanagraph, networkx, numpy, pandas, python, scipy # Default Plugins registry.register_from_modules(core) registry.register_from_modules(graphblas, name="core_graphblas") - registry.register_from_modules(katana, name="core_katana") + registry.register_from_modules(katanagraph, name="core_katanagraph") registry.register_from_modules(networkx, name="core_networkx") registry.register_from_modules(numpy, name="core_numpy") registry.register_from_modules(pandas, name="core_pandas") diff --git a/metagraph/plugins/katana/__init__.py b/metagraph/plugins/katanagraph/__init__.py similarity index 100% rename from metagraph/plugins/katana/__init__.py rename to metagraph/plugins/katanagraph/__init__.py diff --git a/metagraph/plugins/katana/algorithms.py b/metagraph/plugins/katanagraph/algorithms.py similarity index 100% rename from metagraph/plugins/katana/algorithms.py rename to metagraph/plugins/katanagraph/algorithms.py diff --git a/metagraph/plugins/katana/translators.py b/metagraph/plugins/katanagraph/translators.py similarity index 100% rename from metagraph/plugins/katana/translators.py rename to metagraph/plugins/katanagraph/translators.py diff --git a/metagraph/plugins/katana/types.py b/metagraph/plugins/katanagraph/types.py similarity index 100% rename from metagraph/plugins/katana/types.py rename to metagraph/plugins/katanagraph/types.py diff --git a/metagraph/tests/conftest.py b/metagraph/tests/conftest.py index e86cec02..1edc592b 100644 --- a/metagraph/tests/conftest.py +++ b/metagraph/tests/conftest.py @@ -12,11 +12,3 @@ def pytest_addoption(parser): action="store_true", help="Use a DaskResolver instead of the normal Resolver.", ) - parser.addoption( - "--runslow", action="store_true", default=False, help="run slow tests" - ) - - -def pytest_runtest_setup(item): - if "runslow" in item.keywords and not item.config.getoption("--runslow"): - pytest.skip("need --runslow option to run this test") diff --git a/metagraph/tests/plugins/katana/conftest.py b/metagraph/tests/plugins/katanagraph/conftest.py similarity index 100% rename from metagraph/tests/plugins/katana/conftest.py rename to metagraph/tests/plugins/katanagraph/conftest.py diff --git a/metagraph/tests/plugins/katana/test_algorithms.py b/metagraph/tests/plugins/katanagraph/test_algorithms.py similarity index 98% rename from metagraph/tests/plugins/katana/test_algorithms.py rename to metagraph/tests/plugins/katanagraph/test_algorithms.py index dd34ba1d..1497b570 100644 --- a/metagraph/tests/plugins/katana/test_algorithms.py +++ b/metagraph/tests/plugins/katanagraph/test_algorithms.py @@ -53,7 +53,6 @@ def test_sssp_bellman_ford(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): assert distances_nx == distances_kg -@pytest.mark.runslow def test_sssp_bellman_ford_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -84,7 +83,6 @@ def test_sssp_dijkstra(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): assert distances_nx == distances_kg -@pytest.mark.runslow def test_sssp_dijkstra_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -107,7 +105,6 @@ def test_connected_components(networkx_weighted_undirected_8_12, kg_from_nx_ud_8 assert cc_kg == cc_nx -@pytest.mark.runslow def test_connected_components_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -127,7 +124,6 @@ def test_pagerank(networkx_weighted_directed_8_12, kg_from_nx_di_8_12): assert pr_nx == pr_kg -@pytest.mark.runslow def test_pagerank_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs @@ -147,7 +143,6 @@ def test_betweenness_centrality(networkx_weighted_directed_8_12, kg_from_nx_di_8 assert bc_nx == bc_kg -@pytest.mark.runslow def test_betweenness_centrality_kg(katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -168,7 +163,6 @@ def test_triangle_counting(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12 assert tc_nx == tc_kg -@pytest.mark.runslow def test_triangle_counting_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -193,7 +187,6 @@ def test_louvain_community_detection( assert lc_nx[1] == lc_kg[1] -@pytest.mark.runslow def test_louvain_community_detection_kg( katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12 ): @@ -224,7 +217,6 @@ def test_translation_subgraph_extraction( assert list(se_nx.value.edges(data=True)) == list(se_kg.value.edges(data=True)) -@pytest.mark.runslow def test_translation_subgraph_extraction_kg( katanagraph_cleaned_8_12_di, nx_from_kg_di_8_12 ): @@ -249,7 +241,6 @@ def test_labal_propagation(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_12 assert cd_nx == cd_kg -@pytest.mark.runslow def test_labal_propagation_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -276,7 +267,6 @@ def test_jaccard_similarity(networkx_weighted_undirected_8_12, kg_from_nx_ud_8_1 assert jcd_kg[compare_node] == 1 -@pytest.mark.runslow def test_jaccard_similarity_kg(katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12): """ test for katana graph which is directly loaded rather than translated from nettworkx, also test two consecutive runs with the same source code @@ -310,7 +300,6 @@ def test_local_clustering_coefficient( assert not np.any(np.isnan(lcc_kg)) -@pytest.mark.runslow def test_local_clustering_coefficient_kg( katanagraph_cleaned_8_12_ud, nx_from_kg_ud_8_12 ): diff --git a/metagraph/tests/plugins/katana/test_translators.py b/metagraph/tests/plugins/katanagraph/test_translators.py similarity index 100% rename from metagraph/tests/plugins/katana/test_translators.py rename to metagraph/tests/plugins/katanagraph/test_translators.py diff --git a/metagraph/tests/plugins/katana/test_types.py b/metagraph/tests/plugins/katanagraph/test_types.py similarity index 100% rename from metagraph/tests/plugins/katana/test_types.py rename to metagraph/tests/plugins/katanagraph/test_types.py