Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ line-length = 88

[tool.ruff]
line-length = 88
exclude = ["assets/stitch_notebook_template.ipynb"]
exclude = ["**/assets/stitch_notebook_template.ipynb", "*.ipynb"]

[tool.pyright]
typeCheckingMode = "basic"
1 change: 0 additions & 1 deletion src/chuck_data/__main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
"""Entry point for chuck-data when run as a module."""

import sys
import os
import argparse

Expand Down
6 changes: 3 additions & 3 deletions src/chuck_data/chuck_data/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
sys.path.insert(0, src_path)

# Import version from this package
from .version import __version__
from .version import __version__ # noqa: E402

# Import the TUI components
from logger import setup_logging
from ui.tui import ChuckTUI
from logger import setup_logging # noqa: E402
from ui.tui import ChuckTUI # noqa: E402


def setup_arg_parser() -> argparse.ArgumentParser:
Expand Down
1 change: 0 additions & 1 deletion src/clients/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ def list_tables(
params["include_browse"] = "true"
if include_manifest_capabilities:
params["include_manifest_capabilities"] = "true"

return self.get_with_params("/api/2.1/unity-catalog/tables", params)

def get_table(
Expand Down
5 changes: 4 additions & 1 deletion src/commands/list_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,12 @@ def handle_command(
"data_source_format": table.get("data_source_format", ""),
"comment": table.get("comment", ""),
"created_at": table.get("created_at"),
"updated_at": table.get("updated_at"),
"created_by": table.get("created_by", ""),
"owner": table.get("owner", ""),
"row_count": table.get("properties", {}).get("row_count", "Unknown"),
"row_count": table.get("properties", {}).get(
"spark.sql.statistics.numRows", "-"
),
"size_bytes": table.get("properties", {}).get("size_bytes", "Unknown"),
}

Expand Down
43 changes: 38 additions & 5 deletions src/ui/tui.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,17 +828,43 @@ def _display_tables(self, data: Dict[str, Any]) -> None:
table["column_count"] = 0

# Format timestamps if present
for ts_field in ["created", "updated"]:
for ts_field in ["created_at", "updated_at"]:
if ts_field in table and table[ts_field]:
try:
# Convert timestamp to more readable format if needed
# This assumes timestamps are either strings or integers
# Handle Unix timestamps (integers) and ISO strings
timestamp = table[ts_field]
if isinstance(timestamp, str) and len(timestamp) > 10:
if isinstance(timestamp, int):
# Convert Unix timestamp (milliseconds) to readable date
from datetime import datetime

date_obj = datetime.fromtimestamp(timestamp / 1000)
table[ts_field] = date_obj.strftime("%Y-%m-%d")
elif isinstance(timestamp, str) and len(timestamp) > 10:
table[ts_field] = timestamp.split("T")[0]
except Exception:
pass # Keep the original format if conversion fails

# Format row count if present
if "row_count" in table and table["row_count"] not in ["-", "Unknown"]:
try:
row_count = table["row_count"]
if isinstance(row_count, str) and row_count.isdigit():
row_count = int(row_count)

if isinstance(row_count, int):
# Format large numbers with appropriate suffixes
if row_count >= 1_000_000_000:
table["row_count"] = f"{row_count / 1_000_000_000:.1f}B"
elif row_count >= 1_000_000:
table["row_count"] = f"{row_count / 1_000_000:.1f}M"
elif row_count >= 1_000:
table["row_count"] = f"{row_count / 1_000:.1f}K"
else:
table["row_count"] = str(row_count)
except Exception:
pass # Keep the original format if conversion fails

# Define column styling functions
def table_type_style(type_val):
if type_val == "VIEW" or type_val == "view":
Expand All @@ -863,8 +889,15 @@ def table_type_style(type_val):
display_table(
console=self.console,
data=tables,
columns=["name", "table_type", "column_count", "created", "updated"],
headers=["Table Name", "Type", "# Cols", "Created", "Last Updated"],
columns=[
"name",
"table_type",
"column_count",
"row_count",
"created_at",
"updated_at",
],
headers=["Table Name", "Type", "# Cols", "Rows", "Created", "Last Updated"],
title=title,
style_map=style_map,
title_style=TABLE_TITLE_STYLE,
Expand Down
3 changes: 1 addition & 2 deletions tests/test_agent_tool_display_routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ def test_command_name_mapping_prevents_regression(self):
This test specifically prevents the regression where agent tool names with hyphens
(like 'list-schemas') weren't being mapped to the correct display methods.
"""
from unittest.mock import MagicMock

# Test cases: agent tool name -> expected display method call
command_mappings = [
Expand Down Expand Up @@ -307,7 +306,7 @@ def output_callback(tool_name, tool_data):

with patch("src.agent.tool_executor.jsonschema.validate"):
with self.assertRaises(PaginationCancelled):
result = execute_tool(
execute_tool(
mock_client,
"list-schemas",
{"catalog_name": "test_catalog"},
Expand Down
216 changes: 216 additions & 0 deletions tests/test_tui_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,219 @@ def test_display_status_truncates_long_values(self):
self.assertIn(
"https://very-long-workspace-url-that-exceeds-the-displa…", output
)

def test_table_display_field_mapping(self):
"""Test that table display columns match actual data fields from API responses."""
# This test would have caught the created_at/updated_at field mapping issue

# Mock realistic table data structure (matching actual API response)
table_data = {
"tables": [
{
"name": "ecommerce_profiles",
"table_type": "MANAGED",
"created_at": 1748473407547, # Unix timestamp in milliseconds
"updated_at": 1748473408383, # Unix timestamp in milliseconds
"row_count": 4387229, # Large row count for formatting test
"columns": [
{"name": "system_id", "type_text": "string"},
{"name": "last_updated", "type_text": "timestamp"},
],
},
{
"name": "loyalty_member",
"table_type": "MANAGED",
"created_at": 1748473412513,
"updated_at": 1748473413145,
"row_count": 919746, # Medium row count for formatting test
"columns": [{"name": "customer_id", "type_text": "string"}],
},
],
"catalog_name": "john_test",
"schema_name": "bronze",
"total_count": 2,
}

with patch("src.ui.table_formatter.display_table") as mock_display_table:
# _display_tables raises PaginationCancelled by design
from src.exceptions import PaginationCancelled

with self.assertRaises(PaginationCancelled):
self.tui._display_tables(table_data)

# Verify display_table was called
mock_display_table.assert_called_once()
kwargs = mock_display_table.call_args.kwargs

# Verify column names match data fields
columns = kwargs["columns"]
data = kwargs["data"]

# This test would have caught the field name mismatch
for column in columns:
if column in ["name", "table_type"]: # These should always exist
continue
# Verify that display columns exist in the actual data
self.assertTrue(
any(column in row for row in data),
f"Display column '{column}' not found in any data row. Available keys: {list(data[0].keys()) if data else 'No data'}",
)

# Verify expected columns are present (including new row_count)
expected_columns = [
"name",
"table_type",
"column_count",
"row_count",
"created_at",
"updated_at",
]
self.assertEqual(columns, expected_columns)

# Verify data was processed correctly
self.assertEqual(len(data), 2)
self.assertEqual(data[0]["name"], "ecommerce_profiles")
self.assertEqual(data[0]["table_type"], "MANAGED")

# Verify timestamp fields are present and formatted
self.assertIn("created_at", data[0])
self.assertIn("updated_at", data[0])

# Verify row count fields are present and formatted
self.assertIn("row_count", data[0])
self.assertIn("row_count", data[1])

# Verify row count formatting (4387229 -> 4.4M, 919746 -> 919.7K)
self.assertEqual(data[0]["row_count"], "4.4M") # 4387229 formatted
self.assertEqual(data[1]["row_count"], "919.7K") # 919746 formatted

def test_table_timestamp_formatting(self):
"""Test that Unix timestamps are properly converted to readable dates."""

table_data = {
"tables": [
{
"name": "test_table",
"table_type": "MANAGED",
"created_at": 1748473407547, # Unix timestamp in milliseconds
"updated_at": 1748473408383,
"columns": [],
}
],
"catalog_name": "test_catalog",
"schema_name": "test_schema",
"total_count": 1,
}

with patch("src.ui.table_formatter.display_table") as mock_display_table:
# _display_tables raises PaginationCancelled by design
from src.exceptions import PaginationCancelled

with self.assertRaises(PaginationCancelled):
self.tui._display_tables(table_data)

kwargs = mock_display_table.call_args.kwargs
data = kwargs["data"]

# Verify timestamps were converted to readable format (YYYY-MM-DD)
created_date = data[0]["created_at"]
updated_date = data[0]["updated_at"]

# Should be formatted as YYYY-MM-DD
self.assertRegex(
created_date,
r"^\d{4}-\d{2}-\d{2}$",
f"created_at should be formatted as YYYY-MM-DD, got: {created_date}",
)
self.assertRegex(
updated_date,
r"^\d{4}-\d{2}-\d{2}$",
f"updated_at should be formatted as YYYY-MM-DD, got: {updated_date}",
)

# Verify the actual date conversion (1748473407547 ms = 2025-05-28)
self.assertEqual(created_date, "2025-05-28")
self.assertEqual(updated_date, "2025-05-28")

def test_table_display_with_missing_timestamps(self):
"""Test table display handles missing timestamp fields gracefully."""

table_data = {
"tables": [
{
"name": "table_no_timestamps",
"table_type": "VIEW",
# No created_at or updated_at fields
"columns": [],
}
],
"catalog_name": "test_catalog",
"schema_name": "test_schema",
"total_count": 1,
}

with patch("src.ui.table_formatter.display_table") as mock_display_table:
# _display_tables raises PaginationCancelled by design, not an error
from src.exceptions import PaginationCancelled

with self.assertRaises(PaginationCancelled):
self.tui._display_tables(table_data)

kwargs = mock_display_table.call_args.kwargs
data = kwargs["data"]

# Verify the table was processed even without timestamps
self.assertEqual(len(data), 1)
self.assertEqual(data[0]["name"], "table_no_timestamps")

# Timestamp fields should be None or empty
self.assertIsNone(data[0].get("created_at"))
self.assertIsNone(data[0].get("updated_at"))

def test_row_count_formatting(self):
"""Test that row counts are properly formatted with K/M/B suffixes."""

test_cases = [
{"row_count": 123, "expected": "123"}, # Small numbers stay as-is
{"row_count": 1234, "expected": "1.2K"}, # Thousands
{"row_count": 50000, "expected": "50.0K"}, # Tens of thousands
{"row_count": 1234567, "expected": "1.2M"}, # Millions
{"row_count": 4387229, "expected": "4.4M"}, # Real example from API
{"row_count": 1234567890, "expected": "1.2B"}, # Billions
{"row_count": "-", "expected": "-"}, # Dash for unknown values
]

for i, case in enumerate(test_cases):
with self.subTest(case=case):
table_data = {
"tables": [
{
"name": f"test_table_{i}",
"table_type": "MANAGED",
"row_count": case["row_count"],
"columns": [],
}
],
"catalog_name": "test_catalog",
"schema_name": "test_schema",
"total_count": 1,
}

with patch(
"src.ui.table_formatter.display_table"
) as mock_display_table:
from src.exceptions import PaginationCancelled

with self.assertRaises(PaginationCancelled):
self.tui._display_tables(table_data)

kwargs = mock_display_table.call_args.kwargs
data = kwargs["data"]

# Verify row count was formatted correctly
actual_row_count = data[0]["row_count"]
self.assertEqual(
actual_row_count,
case["expected"],
f"Row count {case['row_count']} should format to {case['expected']}, got {actual_row_count}",
)