Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions pyathena/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,9 @@ def _parse_map_native(inner: str) -> Optional[Dict[str, Any]]:


def _parse_named_struct(inner: str) -> Optional[Dict[str, Any]]:
"""Parse named struct format: a=1, b=2.
"""Parse named struct format: key1=value1, key2=value2.

Supports nested structs: outer={inner_key=inner_value}, field=value.

Args:
inner: Interior content of struct without braces.
Expand All @@ -346,8 +348,8 @@ def _parse_named_struct(inner: str) -> Optional[Dict[str, Any]]:
"""
result = {}

# Simple split by comma for basic cases
pairs = [pair.strip() for pair in inner.split(",")]
# Use smart split to handle nested structures
pairs = _split_array_items(inner)

for pair in pairs:
if "=" not in pair:
Expand All @@ -357,10 +359,18 @@ def _parse_named_struct(inner: str) -> Optional[Dict[str, Any]]:
key = key.strip()
value = value.strip()

# Skip pairs with special characters (safety check)
if any(char in key for char in '{}="') or any(char in value for char in '{}="'):
# Skip if key contains special characters (safety check)
if any(char in key for char in '{}="'):
continue

# Handle nested struct values
if value.startswith("{") and value.endswith("}"):
# Try to parse as nested struct
nested_struct = _to_struct(value)
if nested_struct is not None:
result[key] = nested_struct
continue

# Convert value to appropriate type
result[key] = _convert_value(value)

Expand Down
112 changes: 112 additions & 0 deletions tests/pyathena/sqlalchemy/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,118 @@ def test_json_type_with_cast(self, engine):
).fetchone()
assert result.json_types == {"str": "value", "num": 42, "bool": True, "nil": None}

def test_select_nested_struct_query(self, engine):
"""Test SELECT query with nested STRUCT (ROW) types (Issue #627)."""
engine, conn = engine

# Test single level nested struct (simulating Issue #627 scenario)
query = sqlalchemy.text(
"""
SELECT
CAST(ROW(
ROW('2024-01-01', 123),
CAST(4.736 AS DOUBLE),
CAST(0.583 AS DOUBLE)
) AS ROW(header ROW(stamp VARCHAR, seq INTEGER), x DOUBLE, y DOUBLE)) as positions
"""
)
result = conn.execute(query).fetchone()
assert result is not None
assert result.positions is not None
assert isinstance(result.positions, dict)
assert "header" in result.positions
assert isinstance(result.positions["header"], dict)
assert result.positions["header"]["stamp"] == "2024-01-01"
assert result.positions["header"]["seq"] == 123
assert result.positions["x"] == 4.736
assert result.positions["y"] == 0.583

# Test double nested struct
query = sqlalchemy.text(
"""
SELECT
CAST(ROW(
ROW(ROW('value')),
123
) AS ROW(level1 ROW(level2 ROW(level3 VARCHAR)), field INTEGER)) as data
"""
)
result = conn.execute(query).fetchone()
assert result is not None
assert result.data["level1"]["level2"]["level3"] == "value"
assert result.data["field"] == 123

# Test multiple nested fields
query = sqlalchemy.text(
"""
SELECT
CAST(ROW(
ROW(1, 2),
ROW(CAST(0.5 AS DOUBLE), CAST(0.3 AS DOUBLE)),
12345
) AS ROW(
pos ROW(x INTEGER, y INTEGER),
vel ROW(x DOUBLE, y DOUBLE),
timestamp INTEGER
)) as data
"""
)
result = conn.execute(query).fetchone()
assert result is not None
assert result.data["pos"]["x"] == 1
assert result.data["pos"]["y"] == 2
assert result.data["vel"]["x"] == 0.5
assert result.data["vel"]["y"] == 0.3
assert result.data["timestamp"] == 12345

def test_select_array_with_nested_struct(self, engine):
"""Test SELECT query with ARRAY containing nested STRUCT (Issue #627)."""
engine, conn = engine

# Array with nested structs (simulating Issue #627 scenario)
query = sqlalchemy.text(
"""
SELECT
CAST(ARRAY[
ROW(
ROW('2024-01-01', 123),
CAST(4.736 AS DOUBLE)
)
] AS ARRAY<ROW(header ROW(stamp VARCHAR, seq INTEGER), x DOUBLE)>) as positions
"""
)
result = conn.execute(query).fetchone()
assert result is not None
assert result.positions is not None
assert isinstance(result.positions, list)
assert len(result.positions) == 1
assert isinstance(result.positions[0], dict)
assert "header" in result.positions[0]
assert isinstance(result.positions[0]["header"], dict)
assert result.positions[0]["header"]["stamp"] == "2024-01-01"
assert result.positions[0]["header"]["seq"] == 123
assert result.positions[0]["x"] == 4.736

# Multiple elements with nested structs
query = sqlalchemy.text(
"""
SELECT
CAST(ARRAY[
ROW(ROW(1, 2), ROW(CAST(0.5 AS DOUBLE))),
ROW(ROW(3, 4), ROW(CAST(1.5 AS DOUBLE)))
] AS ARRAY<ROW(pos ROW(x INTEGER, y INTEGER), vel ROW(x DOUBLE))>) as data
"""
)
result = conn.execute(query).fetchone()
assert result is not None
assert len(result.data) == 2
assert result.data[0]["pos"]["x"] == 1
assert result.data[0]["pos"]["y"] == 2
assert result.data[0]["vel"]["x"] == 0.5
assert result.data[1]["pos"]["x"] == 3
assert result.data[1]["pos"]["y"] == 4
assert result.data[1]["vel"]["x"] == 1.5

def test_reflect_no_such_table(self, engine):
engine, conn = engine
pytest.raises(
Expand Down
75 changes: 75 additions & 0 deletions tests/pyathena/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,52 @@ def test_to_struct_athena_native_formats(input_value, expected):
assert result == expected


@pytest.mark.parametrize(
"input_value,expected",
[
# Single level nesting (Issue #627)
(
"{header={stamp=2024-01-01, seq=123}, x=4.736, y=0.583}",
{"header": {"stamp": "2024-01-01", "seq": 123}, "x": 4.736, "y": 0.583},
),
# Double nesting
(
"{outer={middle={inner=value}}, field=123}",
{"outer": {"middle": {"inner": "value"}}, "field": 123},
),
# Multiple nested fields
(
"{pos={x=1, y=2}, vel={x=0.5, y=0.3}, timestamp=12345}",
{"pos": {"x": 1, "y": 2}, "vel": {"x": 0.5, "y": 0.3}, "timestamp": 12345},
),
# Triple nesting
(
"{level1={level2={level3={value=deep}}}}",
{"level1": {"level2": {"level3": {"value": "deep"}}}},
),
# Mixed types in nested struct
(
"{metadata={id=123, active=true, name=test}, count=5}",
{"metadata": {"id": 123, "active": True, "name": "test"}, "count": 5},
),
# Nested struct with null value
(
"{data={value=null, status=ok}, flag=true}",
{"data": {"value": None, "status": "ok"}, "flag": True},
),
# Complex nesting with multiple levels and fields
(
"{a={b={c=1, d=2}, e=3}, f=4, g={h=5}}",
{"a": {"b": {"c": 1, "d": 2}, "e": 3}, "f": 4, "g": {"h": 5}},
),
],
)
def test_to_struct_athena_nested_formats(input_value, expected):
"""Test STRUCT conversion for nested struct formats (Issue #627)."""
result = _to_struct(input_value)
assert result == expected


@pytest.mark.parametrize(
"input_value",
[
Expand Down Expand Up @@ -106,6 +152,35 @@ def test_to_array_athena_unnamed_struct_elements():
assert result == expected


@pytest.mark.parametrize(
"input_value,expected",
[
# Array with nested structs (Issue #627)
(
"[{header={stamp=2024-01-01, seq=123}, x=4.736}]",
[{"header": {"stamp": "2024-01-01", "seq": 123}, "x": 4.736}],
),
# Multiple elements with nested structs
(
"[{pos={x=1, y=2}, vel={x=0.5}}, {pos={x=3, y=4}, vel={x=1.5}}]",
[
{"pos": {"x": 1, "y": 2}, "vel": {"x": 0.5}},
{"pos": {"x": 3, "y": 4}, "vel": {"x": 1.5}},
],
),
# Array with deeply nested structs
(
"[{data={meta={id=1, active=true}}}]",
[{"data": {"meta": {"id": 1, "active": True}}}],
),
],
)
def test_to_array_athena_nested_struct_elements(input_value, expected):
"""Test Athena array with nested struct elements (Issue #627)."""
result = _to_array(input_value)
assert result == expected


@pytest.mark.parametrize(
"input_value",
[
Expand Down