diff --git a/graphx.py b/graphx.py new file mode 100644 index 0000000..b4e7a86 --- /dev/null +++ b/graphx.py @@ -0,0 +1,161 @@ +from collections import deque +from typing import Tuple, Union, Dict, List, Optional + + +class Hash: + """Hash class for mapping tuples to unique integer IDs.""" + + def __init__(self): + self.hash_table: Dict[Tuple, int] = {} + + def hash(self, x: Union[int, Tuple[int, int], Tuple[int, int, int]]) -> int: + """Hash a value (int or tuple) to a unique integer ID.""" + if isinstance(x, int): + key = (x, 0, 0) + elif isinstance(x, tuple): + if len(x) == 2: + key = (x[0], x[1], 0) + elif len(x) == 3: + key = x + else: + raise ValueError("Tuple must have 2 or 3 elements") + else: + raise TypeError("Input must be int or tuple") + + if key not in self.hash_table: + self.hash_table[key] = len(self.hash_table) + + return self.hash_table[key] + + +class Graph: + """Graph class supporting directed and undirected weighted graphs.""" + + def __init__(self, n: int, is_directed: bool = True): + """ + Initialize graph. + + Args: + n: Number of nodes + is_directed: True for directed graph, False for undirected + """ + self.n = n + self.is_directed = is_directed + self.N = 5000000 + self.adj: List[List[Tuple[int, int]]] = [[] for _ in range(self.N)] + self.h = Hash() + + def hash(self, u: Union[int, Tuple[int, int]], + v: Optional[Union[int, Tuple[int, int]]] = None, + k: Optional[int] = None) -> int: + """Hash node identifier(s).""" + if k is not None: + return self.h.hash((u, v, k)) + elif v is not None: + return self.h.hash((u, v)) + else: + return self.h.hash(u) + + def add_edge(self, u: Union[int, Tuple[int, int], Tuple[int, int, int]], + v: Union[int, Tuple[int, int], Tuple[int, int, int]], + weight: int = 0): + """ + Add an edge to the graph. + + Args: + u: Source node (int or tuple) + v: Destination node (int or tuple) + weight: Edge weight (default 0) + """ + u_hash = self.h.hash(u) + v_hash = self.h.hash(v) + self._add_edge_internal(u_hash, v_hash, weight) + + def _add_edge_internal(self, u: int, v: int, weight: int = 0): + """Internal method to add edge.""" + self._add_edge_weighted_undirected(u, v, weight) + if not self.is_directed: + self._add_edge_weighted_undirected(v, u, weight) + + def _add_edge_weighted_undirected(self, u: int, v: int, weight: int): + """Add weighted edge to adjacency list.""" + self.adj[u].append((v, weight)) + + +class BFS: + """Breadth-First Search implementation.""" + + def __init__(self, graph: Graph): + """ + Initialize BFS. + + Args: + graph: Graph instance to perform BFS on + """ + self.g = graph + self.min_dist_from_source: List[int] = [] + self.visited: List[bool] = [] + self.clear() + + def clear(self): + """Reset BFS state.""" + self.min_dist_from_source = [-1] * self.g.N + self.visited = [False] * self.g.N + + def run(self, source: Union[int, Tuple[int, int], Tuple[int, int, int]]): + """ + Run BFS from source node. + + Args: + source: Source node (int or tuple) + """ + source_hash = self.g.h.hash(source) + self._run_internal(source_hash) + + def min_dist(self, target: Union[int, Tuple[int, int], Tuple[int, int, int]]) -> int: + """ + Get minimum distance to target node. + + Args: + target: Target node (int or tuple) + + Returns: + Minimum distance from source to target (-1 if unreachable) + """ + target_hash = self.g.h.hash(target) + return self._min_dist_internal(target_hash) + + def is_visited(self, target: Union[int, Tuple[int, int], Tuple[int, int, int]]) -> bool: + """ + Check if target node was visited. + + Args: + target: Target node (int or tuple) + + Returns: + True if node was visited during BFS + """ + target_hash = self.g.h.hash(target) + return self._is_visited_internal(target_hash) + + def _run_internal(self, source: int): + """Internal BFS implementation.""" + q = deque([source]) + self.visited[source] = True + self.min_dist_from_source[source] = 0 + + while q: + cur_node = q.popleft() + for adj_node, _ in self.g.adj[cur_node]: + if not self.visited[adj_node]: + self.visited[adj_node] = True + self.min_dist_from_source[adj_node] = self.min_dist_from_source[cur_node] + 1 + q.append(adj_node) + + def _min_dist_internal(self, target: int) -> int: + """Get minimum distance (internal).""" + return self.min_dist_from_source[target] + + def _is_visited_internal(self, target: int) -> bool: + """Check if visited (internal).""" + return self.visited[target] diff --git a/test_graphx.py b/test_graphx.py new file mode 100644 index 0000000..18fdeb0 --- /dev/null +++ b/test_graphx.py @@ -0,0 +1,204 @@ +import unittest +from graphx import Hash, Graph, BFS + + +class TestHash(unittest.TestCase): + """Test cases for Hash class.""" + + def test_hash_int(self): + """Test hashing integers.""" + h = Hash() + self.assertEqual(h.hash(1), 0) + self.assertEqual(h.hash(2), 1) + self.assertEqual(h.hash(1), 0) + + def test_hash_tuple2(self): + """Test hashing 2-tuples.""" + h = Hash() + self.assertEqual(h.hash((1, 2)), 0) + self.assertEqual(h.hash((3, 4)), 1) + self.assertEqual(h.hash((1, 2)), 0) + + def test_hash_tuple3(self): + """Test hashing 3-tuples.""" + h = Hash() + self.assertEqual(h.hash((1, 2, 3)), 0) + self.assertEqual(h.hash((4, 5, 6)), 1) + self.assertEqual(h.hash((1, 2, 3)), 0) + + def test_hash_consistency(self): + """Test that equivalent inputs produce same hash.""" + h = Hash() + self.assertEqual(h.hash(5), h.hash((5, 0, 0))) + self.assertEqual(h.hash((5, 10)), h.hash((5, 10, 0))) + + +class TestGraph(unittest.TestCase): + """Test cases for Graph class.""" + + def test_directed_graph(self): + """Test directed graph creation.""" + g = Graph(n=5, is_directed=True) + self.assertTrue(g.is_directed) + self.assertEqual(g.n, 5) + + def test_undirected_graph(self): + """Test undirected graph creation.""" + g = Graph(n=5, is_directed=False) + self.assertFalse(g.is_directed) + + def test_add_edge_directed(self): + """Test adding edges to directed graph.""" + g = Graph(n=5, is_directed=True) + g.add_edge(0, 1, weight=5) + + u_hash = g.h.hash(0) + v_hash = g.h.hash(1) + + self.assertEqual(len(g.adj[u_hash]), 1) + self.assertEqual(g.adj[u_hash][0], (v_hash, 5)) + self.assertEqual(len(g.adj[v_hash]), 0) + + def test_add_edge_undirected(self): + """Test adding edges to undirected graph.""" + g = Graph(n=5, is_directed=False) + g.add_edge(0, 1, weight=3) + + u_hash = g.h.hash(0) + v_hash = g.h.hash(1) + + self.assertEqual(len(g.adj[u_hash]), 1) + self.assertEqual(len(g.adj[v_hash]), 1) + self.assertEqual(g.adj[u_hash][0], (v_hash, 3)) + self.assertEqual(g.adj[v_hash][0], (u_hash, 3)) + + def test_add_edge_tuple(self): + """Test adding edges with tuple nodes.""" + g = Graph(n=10, is_directed=True) + g.add_edge((0, 0), (1, 1), weight=7) + + u_hash = g.h.hash((0, 0)) + v_hash = g.h.hash((1, 1)) + + self.assertEqual(len(g.adj[u_hash]), 1) + self.assertEqual(g.adj[u_hash][0], (v_hash, 7)) + + +class TestBFS(unittest.TestCase): + """Test cases for BFS class.""" + + def test_simple_path(self): + """Test BFS on simple path graph.""" + g = Graph(n=5, is_directed=True) + g.add_edge(0, 1) + g.add_edge(1, 2) + g.add_edge(2, 3) + + bfs = BFS(g) + bfs.run(0) + + self.assertEqual(bfs.min_dist(0), 0) + self.assertEqual(bfs.min_dist(1), 1) + self.assertEqual(bfs.min_dist(2), 2) + self.assertEqual(bfs.min_dist(3), 3) + self.assertEqual(bfs.min_dist(4), -1) + + def test_unreachable_node(self): + """Test BFS with unreachable nodes.""" + g = Graph(n=5, is_directed=True) + g.add_edge(0, 1) + g.add_edge(2, 3) + + bfs = BFS(g) + bfs.run(0) + + self.assertTrue(bfs.is_visited(0)) + self.assertTrue(bfs.is_visited(1)) + self.assertFalse(bfs.is_visited(2)) + self.assertFalse(bfs.is_visited(3)) + + def test_undirected_graph_bfs(self): + """Test BFS on undirected graph.""" + g = Graph(n=4, is_directed=False) + g.add_edge(0, 1) + g.add_edge(1, 2) + g.add_edge(2, 3) + + bfs = BFS(g) + bfs.run(0) + + self.assertEqual(bfs.min_dist(3), 3) + + bfs.clear() + bfs.run(3) + self.assertEqual(bfs.min_dist(0), 3) + + def test_cycle_graph(self): + """Test BFS on graph with cycles.""" + g = Graph(n=4, is_directed=True) + g.add_edge(0, 1) + g.add_edge(1, 2) + g.add_edge(2, 3) + g.add_edge(3, 1) + + bfs = BFS(g) + bfs.run(0) + + self.assertEqual(bfs.min_dist(1), 1) + self.assertEqual(bfs.min_dist(2), 2) + self.assertEqual(bfs.min_dist(3), 3) + + def test_tuple_nodes(self): + """Test BFS with tuple nodes.""" + g = Graph(n=10, is_directed=True) + g.add_edge((0, 0), (1, 1)) + g.add_edge((1, 1), (2, 2)) + + bfs = BFS(g) + bfs.run((0, 0)) + + self.assertEqual(bfs.min_dist((0, 0)), 0) + self.assertEqual(bfs.min_dist((1, 1)), 1) + self.assertEqual(bfs.min_dist((2, 2)), 2) + + def test_clear(self): + """Test BFS clear and rerun.""" + g = Graph(n=3, is_directed=True) + g.add_edge(0, 1) + g.add_edge(1, 2) + + bfs = BFS(g) + bfs.run(0) + self.assertTrue(bfs.is_visited(1)) + + bfs.clear() + self.assertFalse(bfs.is_visited(1)) + + bfs.run(1) + self.assertTrue(bfs.is_visited(2)) + self.assertEqual(bfs.min_dist(2), 1) + + +class TestIntegration(unittest.TestCase): + """Integration tests combining multiple components.""" + + def test_complete_workflow(self): + """Test complete workflow with all features.""" + g = Graph(n=6, is_directed=False) + + g.add_edge(0, 1, weight=1) + g.add_edge(0, 2, weight=2) + g.add_edge(1, 3, weight=3) + g.add_edge(2, 3, weight=4) + g.add_edge(3, 4, weight=5) + + bfs = BFS(g) + bfs.run(0) + + self.assertEqual(bfs.min_dist(4), 3) + self.assertTrue(bfs.is_visited(3)) + self.assertFalse(bfs.is_visited(5)) + + +if __name__ == '__main__': + unittest.main() diff --git a/usage-python.md b/usage-python.md new file mode 100644 index 0000000..cfe0c14 --- /dev/null +++ b/usage-python.md @@ -0,0 +1,420 @@ +# GraphX Python - Usage Guide + +A Python implementation of the GraphX library for creating and analyzing graphs with support for both directed and undirected weighted graphs, featuring BFS traversal and flexible node identification. + +## Table of Contents +- [Installation](#installation) +- [Core Components](#core-components) +- [Quick Start](#quick-start) +- [Detailed Usage](#detailed-usage) + - [Hash Class](#hash-class) + - [Graph Class](#graph-class) + - [BFS Class](#bfs-class) +- [Examples](#examples) +- [API Reference](#api-reference) + +## Installation + +Simply import the module into your Python project: + +```python +from graphx import Hash, Graph, BFS +``` + +**Requirements:** Python 3.6+ (uses type hints and deque from collections) + +## Core Components + +GraphX provides three main classes: + +1. **Hash** - Maps tuples to unique integer IDs +2. **Graph** - Creates and manages directed/undirected weighted graphs +3. **BFS** - Performs Breadth-First Search traversal + +## Quick Start + +Here's a simple example to get you started: + +```python +from graphx import Graph, BFS + +# Create an undirected graph with 5 nodes +g = Graph(n=5, is_directed=False) + +# Add edges +g.add_edge(0, 1, weight=5) +g.add_edge(1, 2, weight=3) +g.add_edge(2, 3, weight=7) + +# Run BFS from node 0 +bfs = BFS(g) +bfs.run(source=0) + +# Get minimum distance to node 3 +distance = bfs.min_dist(3) +print(f"Distance from 0 to 3: {distance}") # Output: 3 + +# Check if node was visited +if bfs.is_visited(3): + print("Node 3 was reached!") +``` + +## Detailed Usage + +### Hash Class + +The `Hash` class provides a mapping from tuples to unique integer IDs. + +```python +from graphx import Hash + +h = Hash() + +# Hash integers +id1 = h.hash(5) # Returns 0 +id2 = h.hash(10) # Returns 1 +id3 = h.hash(5) # Returns 0 (same as before) + +# Hash 2-tuples +id4 = h.hash((1, 2)) # Returns 2 +id5 = h.hash((3, 4)) # Returns 3 + +# Hash 3-tuples +id6 = h.hash((1, 2, 3)) # Returns 4 +``` + +**Key Features:** +- Consistent hashing: same input always returns same ID +- Supports int, 2-tuple, and 3-tuple inputs +- Automatically normalizes inputs to 3-tuples internally + +### Graph Class + +The `Graph` class supports both directed and undirected weighted graphs. + +#### Creating a Graph + +```python +from graphx import Graph + +# Directed graph +g_directed = Graph(n=10, is_directed=True) + +# Undirected graph +g_undirected = Graph(n=10, is_directed=False) +``` + +#### Adding Edges + +```python +# Simple integer nodes +g.add_edge(0, 1, weight=5) + +# Tuple nodes (useful for grid-based graphs) +g.add_edge((0, 0), (0, 1), weight=1) +g.add_edge((0, 1), (1, 1), weight=2) + +# 3-tuple nodes +g.add_edge((0, 0, 0), (1, 1, 1), weight=10) + +# Default weight is 0 +g.add_edge(2, 3) # weight=0 +``` + +**Directed vs Undirected:** +- **Directed**: edge from u → v only +- **Undirected**: edges created in both directions (u ↔ v) + +### BFS Class + +The `BFS` class performs Breadth-First Search traversal. + +#### Basic BFS + +```python +from graphx import Graph, BFS + +# Create graph +g = Graph(n=5, is_directed=True) +g.add_edge(0, 1) +g.add_edge(1, 2) +g.add_edge(2, 3) + +# Initialize BFS +bfs = BFS(g) + +# Run from source node 0 +bfs.run(source=0) + +# Query results +print(bfs.min_dist(3)) # Minimum distance to node 3 +print(bfs.is_visited(3)) # True if node 3 was reached +``` + +#### Clearing and Re-running + +```python +# Run BFS from node 0 +bfs.run(0) +print(bfs.min_dist(3)) # Distance from 0 to 3 + +# Clear state and run from different source +bfs.clear() +bfs.run(1) +print(bfs.min_dist(3)) # Distance from 1 to 3 +``` + +## Examples + +### Example 1: Simple Path Graph + +```python +from graphx import Graph, BFS + +# Create a directed path: 0 → 1 → 2 → 3 +g = Graph(n=4, is_directed=True) +g.add_edge(0, 1) +g.add_edge(1, 2) +g.add_edge(2, 3) + +bfs = BFS(g) +bfs.run(0) + +print(f"Distance to node 1: {bfs.min_dist(1)}") # 1 +print(f"Distance to node 2: {bfs.min_dist(2)}") # 2 +print(f"Distance to node 3: {bfs.min_dist(3)}") # 3 +``` + +### Example 2: Undirected Weighted Graph + +```python +from graphx import Graph, BFS + +# Create an undirected weighted graph +g = Graph(n=5, is_directed=False) +g.add_edge(0, 1, weight=10) +g.add_edge(0, 2, weight=5) +g.add_edge(1, 3, weight=7) +g.add_edge(2, 3, weight=3) + +bfs = BFS(g) +bfs.run(0) + +# BFS finds shortest path by number of edges, not weight +print(f"Distance 0 to 3: {bfs.min_dist(3)}") # 2 (via node 1 or 2) +``` + +**Note:** BFS finds the shortest path by **number of edges**, not by weight sum. For weighted shortest paths, use Dijkstra's algorithm. + +### Example 3: Grid Graph with Tuple Nodes + +```python +from graphx import Graph, BFS + +# Create a 3x3 grid graph using (row, col) tuples +g = Graph(n=9, is_directed=False) + +# Add horizontal edges +for row in range(3): + for col in range(2): + g.add_edge((row, col), (row, col + 1)) + +# Add vertical edges +for col in range(3): + for row in range(2): + g.add_edge((row, col), (row + 1, col)) + +# Find distance from top-left to bottom-right +bfs = BFS(g) +bfs.run((0, 0)) +distance = bfs.min_dist((2, 2)) +print(f"Distance from (0,0) to (2,2): {distance}") # 4 +``` + +### Example 4: Detecting Connected Components + +```python +from graphx import Graph, BFS + +# Graph with two separate components +g = Graph(n=6, is_directed=False) + +# Component 1: nodes 0, 1, 2 +g.add_edge(0, 1) +g.add_edge(1, 2) + +# Component 2: nodes 3, 4, 5 +g.add_edge(3, 4) +g.add_edge(4, 5) + +# Run BFS from node 0 +bfs = BFS(g) +bfs.run(0) + +# Check which nodes are reachable +for node in range(6): + if bfs.is_visited(node): + print(f"Node {node} is in same component as 0") + else: + print(f"Node {node} is NOT reachable from 0") +``` + +### Example 5: Graph with Cycles + +```python +from graphx import Graph, BFS + +# Create a cyclic graph +g = Graph(n=4, is_directed=True) +g.add_edge(0, 1) +g.add_edge(1, 2) +g.add_edge(2, 3) +g.add_edge(3, 1) # Cycle back to node 1 + +bfs = BFS(g) +bfs.run(0) + +# BFS handles cycles correctly +print(f"Distance to 1: {bfs.min_dist(1)}") # 1 +print(f"Distance to 2: {bfs.min_dist(2)}") # 2 +print(f"Distance to 3: {bfs.min_dist(3)}") # 3 +``` + +## API Reference + +### Hash Class + +```python +h = Hash() +``` + +**Methods:** +- `hash(x)` - Hash an int or tuple to unique integer ID + - Parameters: `x` (int | tuple[int, int] | tuple[int, int, int]) + - Returns: `int` (unique ID) + +### Graph Class + +```python +g = Graph(n, is_directed=True) +``` + +**Constructor:** +- `n` (int) - Number of nodes +- `is_directed` (bool) - True for directed, False for undirected (default: True) + +**Methods:** +- `add_edge(u, v, weight=0)` - Add an edge to the graph + - `u` - Source node (int or tuple) + - `v` - Destination node (int or tuple) + - `weight` (int) - Edge weight (default: 0) + +**Attributes:** +- `n` - Number of nodes +- `is_directed` - Graph type +- `adj` - Adjacency list (internal use) +- `h` - Hash instance (internal use) + +### BFS Class + +```python +bfs = BFS(graph) +``` + +**Constructor:** +- `graph` (Graph) - Graph instance to traverse + +**Methods:** +- `run(source)` - Run BFS from source node + - `source` - Starting node (int or tuple) + +- `min_dist(target)` - Get minimum distance to target + - `target` - Target node (int or tuple) + - Returns: `int` (-1 if unreachable) + +- `is_visited(target)` - Check if node was visited + - `target` - Target node (int or tuple) + - Returns: `bool` + +- `clear()` - Reset BFS state for re-running + +## Testing + +Run the included test suite: + +```bash +python test_graphx.py +``` + +The test suite includes: +- Hash functionality tests +- Graph creation and edge addition tests +- BFS traversal tests +- Integration tests + +## Performance Notes + +- The graph uses a pre-allocated adjacency list of size 5,000,000 +- Suitable for most applications with millions of nodes +- BFS has O(V + E) time complexity +- Hash lookups are O(1) on average + +## Common Patterns + +### Pattern 1: Check Reachability + +```python +g = Graph(n=10, is_directed=True) +# ... add edges ... + +bfs = BFS(g) +bfs.run(source=0) + +if bfs.is_visited(target): + print(f"There is a path from 0 to {target}") +else: + print(f"No path exists from 0 to {target}") +``` + +### Pattern 2: Find Shortest Path Distance + +```python +g = Graph(n=10, is_directed=False) +# ... add edges ... + +bfs = BFS(g) +bfs.run(source=start) + +distance = bfs.min_dist(end) +if distance == -1: + print("No path exists") +else: + print(f"Shortest path has {distance} edges") +``` + +### Pattern 3: Multiple BFS Runs + +```python +g = Graph(n=10, is_directed=False) +# ... add edges ... + +bfs = BFS(g) + +# Run from multiple sources +sources = [0, 5, 9] +for src in sources: + bfs.clear() + bfs.run(src) + # Process results... +``` + +## Limitations + +- BFS finds shortest path by **edge count**, not by weight +- For weighted shortest paths, implement Dijkstra's or Bellman-Ford +- Maximum node capacity is 5,000,000 (can be modified by changing `N` in Graph class) +- No built-in path reconstruction (only distance and reachability) + +## License + +This is a Python port of the C++ GraphX library.