diff --git a/pyproject.toml b/pyproject.toml index d7b1e3f..9f5f54a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ line-length = 88 [tool.ruff] line-length = 88 -exclude = ["assets/stitch_notebook_template.ipynb"] +exclude = ["**/assets/stitch_notebook_template.ipynb", "*.ipynb"] [tool.pyright] typeCheckingMode = "basic" diff --git a/src/chuck_data/__main__.py b/src/chuck_data/__main__.py index 4a11c21..ac6099b 100644 --- a/src/chuck_data/__main__.py +++ b/src/chuck_data/__main__.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 """Entry point for chuck-data when run as a module.""" -import sys import os import argparse diff --git a/src/chuck_data/chuck_data/__main__.py b/src/chuck_data/chuck_data/__main__.py index 107c9df..59b9263 100644 --- a/src/chuck_data/chuck_data/__main__.py +++ b/src/chuck_data/chuck_data/__main__.py @@ -11,11 +11,11 @@ sys.path.insert(0, src_path) # Import version from this package -from .version import __version__ +from .version import __version__ # noqa: E402 # Import the TUI components -from logger import setup_logging -from ui.tui import ChuckTUI +from logger import setup_logging # noqa: E402 +from ui.tui import ChuckTUI # noqa: E402 def setup_arg_parser() -> argparse.ArgumentParser: diff --git a/src/clients/databricks.py b/src/clients/databricks.py index af43a76..b35865f 100644 --- a/src/clients/databricks.py +++ b/src/clients/databricks.py @@ -363,7 +363,6 @@ def list_tables( params["include_browse"] = "true" if include_manifest_capabilities: params["include_manifest_capabilities"] = "true" - return self.get_with_params("/api/2.1/unity-catalog/tables", params) def get_table( diff --git a/src/commands/list_tables.py b/src/commands/list_tables.py index 1654069..7b72f53 100644 --- a/src/commands/list_tables.py +++ b/src/commands/list_tables.py @@ -88,9 +88,12 @@ def handle_command( "data_source_format": table.get("data_source_format", ""), "comment": table.get("comment", ""), "created_at": table.get("created_at"), + "updated_at": table.get("updated_at"), "created_by": table.get("created_by", ""), "owner": table.get("owner", ""), - "row_count": table.get("properties", {}).get("row_count", "Unknown"), + "row_count": table.get("properties", {}).get( + "spark.sql.statistics.numRows", "-" + ), "size_bytes": table.get("properties", {}).get("size_bytes", "Unknown"), } diff --git a/src/ui/tui.py b/src/ui/tui.py index 0a4a8ef..bbdcf67 100644 --- a/src/ui/tui.py +++ b/src/ui/tui.py @@ -828,17 +828,43 @@ def _display_tables(self, data: Dict[str, Any]) -> None: table["column_count"] = 0 # Format timestamps if present - for ts_field in ["created", "updated"]: + for ts_field in ["created_at", "updated_at"]: if ts_field in table and table[ts_field]: try: # Convert timestamp to more readable format if needed - # This assumes timestamps are either strings or integers + # Handle Unix timestamps (integers) and ISO strings timestamp = table[ts_field] - if isinstance(timestamp, str) and len(timestamp) > 10: + if isinstance(timestamp, int): + # Convert Unix timestamp (milliseconds) to readable date + from datetime import datetime + + date_obj = datetime.fromtimestamp(timestamp / 1000) + table[ts_field] = date_obj.strftime("%Y-%m-%d") + elif isinstance(timestamp, str) and len(timestamp) > 10: table[ts_field] = timestamp.split("T")[0] except Exception: pass # Keep the original format if conversion fails + # Format row count if present + if "row_count" in table and table["row_count"] not in ["-", "Unknown"]: + try: + row_count = table["row_count"] + if isinstance(row_count, str) and row_count.isdigit(): + row_count = int(row_count) + + if isinstance(row_count, int): + # Format large numbers with appropriate suffixes + if row_count >= 1_000_000_000: + table["row_count"] = f"{row_count / 1_000_000_000:.1f}B" + elif row_count >= 1_000_000: + table["row_count"] = f"{row_count / 1_000_000:.1f}M" + elif row_count >= 1_000: + table["row_count"] = f"{row_count / 1_000:.1f}K" + else: + table["row_count"] = str(row_count) + except Exception: + pass # Keep the original format if conversion fails + # Define column styling functions def table_type_style(type_val): if type_val == "VIEW" or type_val == "view": @@ -863,8 +889,15 @@ def table_type_style(type_val): display_table( console=self.console, data=tables, - columns=["name", "table_type", "column_count", "created", "updated"], - headers=["Table Name", "Type", "# Cols", "Created", "Last Updated"], + columns=[ + "name", + "table_type", + "column_count", + "row_count", + "created_at", + "updated_at", + ], + headers=["Table Name", "Type", "# Cols", "Rows", "Created", "Last Updated"], title=title, style_map=style_map, title_style=TABLE_TITLE_STYLE, diff --git a/tests/test_agent_tool_display_routing.py b/tests/test_agent_tool_display_routing.py index 353aee6..1ab8f39 100644 --- a/tests/test_agent_tool_display_routing.py +++ b/tests/test_agent_tool_display_routing.py @@ -168,7 +168,6 @@ def test_command_name_mapping_prevents_regression(self): This test specifically prevents the regression where agent tool names with hyphens (like 'list-schemas') weren't being mapped to the correct display methods. """ - from unittest.mock import MagicMock # Test cases: agent tool name -> expected display method call command_mappings = [ @@ -307,7 +306,7 @@ def output_callback(tool_name, tool_data): with patch("src.agent.tool_executor.jsonschema.validate"): with self.assertRaises(PaginationCancelled): - result = execute_tool( + execute_tool( mock_client, "list-schemas", {"catalog_name": "test_catalog"}, diff --git a/tests/test_tui_display.py b/tests/test_tui_display.py index 59c4377..cc88502 100644 --- a/tests/test_tui_display.py +++ b/tests/test_tui_display.py @@ -209,3 +209,219 @@ def test_display_status_truncates_long_values(self): self.assertIn( "https://very-long-workspace-url-that-exceeds-the-displa…", output ) + + def test_table_display_field_mapping(self): + """Test that table display columns match actual data fields from API responses.""" + # This test would have caught the created_at/updated_at field mapping issue + + # Mock realistic table data structure (matching actual API response) + table_data = { + "tables": [ + { + "name": "ecommerce_profiles", + "table_type": "MANAGED", + "created_at": 1748473407547, # Unix timestamp in milliseconds + "updated_at": 1748473408383, # Unix timestamp in milliseconds + "row_count": 4387229, # Large row count for formatting test + "columns": [ + {"name": "system_id", "type_text": "string"}, + {"name": "last_updated", "type_text": "timestamp"}, + ], + }, + { + "name": "loyalty_member", + "table_type": "MANAGED", + "created_at": 1748473412513, + "updated_at": 1748473413145, + "row_count": 919746, # Medium row count for formatting test + "columns": [{"name": "customer_id", "type_text": "string"}], + }, + ], + "catalog_name": "john_test", + "schema_name": "bronze", + "total_count": 2, + } + + with patch("src.ui.table_formatter.display_table") as mock_display_table: + # _display_tables raises PaginationCancelled by design + from src.exceptions import PaginationCancelled + + with self.assertRaises(PaginationCancelled): + self.tui._display_tables(table_data) + + # Verify display_table was called + mock_display_table.assert_called_once() + kwargs = mock_display_table.call_args.kwargs + + # Verify column names match data fields + columns = kwargs["columns"] + data = kwargs["data"] + + # This test would have caught the field name mismatch + for column in columns: + if column in ["name", "table_type"]: # These should always exist + continue + # Verify that display columns exist in the actual data + self.assertTrue( + any(column in row for row in data), + f"Display column '{column}' not found in any data row. Available keys: {list(data[0].keys()) if data else 'No data'}", + ) + + # Verify expected columns are present (including new row_count) + expected_columns = [ + "name", + "table_type", + "column_count", + "row_count", + "created_at", + "updated_at", + ] + self.assertEqual(columns, expected_columns) + + # Verify data was processed correctly + self.assertEqual(len(data), 2) + self.assertEqual(data[0]["name"], "ecommerce_profiles") + self.assertEqual(data[0]["table_type"], "MANAGED") + + # Verify timestamp fields are present and formatted + self.assertIn("created_at", data[0]) + self.assertIn("updated_at", data[0]) + + # Verify row count fields are present and formatted + self.assertIn("row_count", data[0]) + self.assertIn("row_count", data[1]) + + # Verify row count formatting (4387229 -> 4.4M, 919746 -> 919.7K) + self.assertEqual(data[0]["row_count"], "4.4M") # 4387229 formatted + self.assertEqual(data[1]["row_count"], "919.7K") # 919746 formatted + + def test_table_timestamp_formatting(self): + """Test that Unix timestamps are properly converted to readable dates.""" + + table_data = { + "tables": [ + { + "name": "test_table", + "table_type": "MANAGED", + "created_at": 1748473407547, # Unix timestamp in milliseconds + "updated_at": 1748473408383, + "columns": [], + } + ], + "catalog_name": "test_catalog", + "schema_name": "test_schema", + "total_count": 1, + } + + with patch("src.ui.table_formatter.display_table") as mock_display_table: + # _display_tables raises PaginationCancelled by design + from src.exceptions import PaginationCancelled + + with self.assertRaises(PaginationCancelled): + self.tui._display_tables(table_data) + + kwargs = mock_display_table.call_args.kwargs + data = kwargs["data"] + + # Verify timestamps were converted to readable format (YYYY-MM-DD) + created_date = data[0]["created_at"] + updated_date = data[0]["updated_at"] + + # Should be formatted as YYYY-MM-DD + self.assertRegex( + created_date, + r"^\d{4}-\d{2}-\d{2}$", + f"created_at should be formatted as YYYY-MM-DD, got: {created_date}", + ) + self.assertRegex( + updated_date, + r"^\d{4}-\d{2}-\d{2}$", + f"updated_at should be formatted as YYYY-MM-DD, got: {updated_date}", + ) + + # Verify the actual date conversion (1748473407547 ms = 2025-05-28) + self.assertEqual(created_date, "2025-05-28") + self.assertEqual(updated_date, "2025-05-28") + + def test_table_display_with_missing_timestamps(self): + """Test table display handles missing timestamp fields gracefully.""" + + table_data = { + "tables": [ + { + "name": "table_no_timestamps", + "table_type": "VIEW", + # No created_at or updated_at fields + "columns": [], + } + ], + "catalog_name": "test_catalog", + "schema_name": "test_schema", + "total_count": 1, + } + + with patch("src.ui.table_formatter.display_table") as mock_display_table: + # _display_tables raises PaginationCancelled by design, not an error + from src.exceptions import PaginationCancelled + + with self.assertRaises(PaginationCancelled): + self.tui._display_tables(table_data) + + kwargs = mock_display_table.call_args.kwargs + data = kwargs["data"] + + # Verify the table was processed even without timestamps + self.assertEqual(len(data), 1) + self.assertEqual(data[0]["name"], "table_no_timestamps") + + # Timestamp fields should be None or empty + self.assertIsNone(data[0].get("created_at")) + self.assertIsNone(data[0].get("updated_at")) + + def test_row_count_formatting(self): + """Test that row counts are properly formatted with K/M/B suffixes.""" + + test_cases = [ + {"row_count": 123, "expected": "123"}, # Small numbers stay as-is + {"row_count": 1234, "expected": "1.2K"}, # Thousands + {"row_count": 50000, "expected": "50.0K"}, # Tens of thousands + {"row_count": 1234567, "expected": "1.2M"}, # Millions + {"row_count": 4387229, "expected": "4.4M"}, # Real example from API + {"row_count": 1234567890, "expected": "1.2B"}, # Billions + {"row_count": "-", "expected": "-"}, # Dash for unknown values + ] + + for i, case in enumerate(test_cases): + with self.subTest(case=case): + table_data = { + "tables": [ + { + "name": f"test_table_{i}", + "table_type": "MANAGED", + "row_count": case["row_count"], + "columns": [], + } + ], + "catalog_name": "test_catalog", + "schema_name": "test_schema", + "total_count": 1, + } + + with patch( + "src.ui.table_formatter.display_table" + ) as mock_display_table: + from src.exceptions import PaginationCancelled + + with self.assertRaises(PaginationCancelled): + self.tui._display_tables(table_data) + + kwargs = mock_display_table.call_args.kwargs + data = kwargs["data"] + + # Verify row count was formatted correctly + actual_row_count = data[0]["row_count"] + self.assertEqual( + actual_row_count, + case["expected"], + f"Row count {case['row_count']} should format to {case['expected']}, got {actual_row_count}", + )