Skip to content

Commit ee229cd

Browse files
committed
odata_to_staging: fix parse + upload
1 parent b17d6b3 commit ee229cd

File tree

3 files changed

+61
-14
lines changed

3 files changed

+61
-14
lines changed

odata_to_staging/functions/odata_v4_client.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,13 @@ def _extract_type_name(full_type: str) -> str:
9696
full_type: Fully-qualified type string (e.g., "Namespace.Type")
9797
9898
Returns:
99-
Just the type name portion
99+
Just the type name portion (without brackets)
100100
"""
101101
if not full_type:
102102
return ""
103103

104+
result: str
105+
104106
# Handle bracket-quoted identifiers: "[Schema].[Type]" or "Namespace.[Type]"
105107
# Split on dots that are NOT inside brackets
106108
# Strategy: find the last segment, which may be bracket-quoted
@@ -110,7 +112,11 @@ def _extract_type_name(full_type: str) -> str:
110112
if last_bracket > 0:
111113
# Check if there's a dot before the bracket
112114
if full_type[last_bracket - 1] == ".":
113-
return full_type[last_bracket:]
115+
result = full_type[last_bracket:]
116+
# Strip surrounding brackets if present
117+
if result.startswith("[") and result.endswith("]"):
118+
return result[1:-1]
119+
return result
114120
# If the whole thing starts with '[', it might be the full type name
115121
if full_type.startswith("["):
116122
# Look for pattern like "[Schema].[Type]" - return last bracketed segment
@@ -133,7 +139,11 @@ def _extract_type_name(full_type: str) -> str:
133139
if current:
134140
parts.append(current)
135141
if parts:
136-
return parts[-1]
142+
result = parts[-1]
143+
# Strip surrounding brackets if present
144+
if result.startswith("[") and result.endswith("]"):
145+
return result[1:-1]
146+
return result
137147

138148
# Standard case: "Namespace.TypeName" -> "TypeName"
139149
return full_type.split(".")[-1]
@@ -437,6 +447,9 @@ def get_entity_properties(
437447
def _find_entity_type(self, type_name: str) -> Optional[Dict[str, Any]]:
438448
"""Find EntityType by name with case-insensitive fallback.
439449
450+
Also handles bracket-quoted names where the type might be stored with
451+
a full name like '[Schema].[Type]' but we're searching for 'Type'.
452+
440453
Args:
441454
type_name: Name of the EntityType to find
442455
@@ -457,7 +470,19 @@ def _find_entity_type(self, type_name: str) -> Optional[Dict[str, Any]]:
457470
),
458471
None,
459472
)
460-
return entity_type
473+
if entity_type:
474+
return entity_type
475+
476+
# Try matching against extracted type names (for bracket-quoted full names)
477+
# e.g., type_name='METADATA' should match stored name='[APICUST].[METADATA]'
478+
for name, et in self.schema["entity_types"].items():
479+
extracted_name = self._extract_type_name(name)
480+
if extracted_name == type_name:
481+
return et
482+
if extracted_name.lower() == type_name.lower():
483+
return et
484+
485+
return None
461486

462487
def get_navigation_properties(self, entity_set_name: str) -> List[str]:
463488
"""Return list of navigation property names for an EntitySet.

odata_to_staging/tests/test_odata_v4_client.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -349,18 +349,18 @@ def test_empty_string(self):
349349
assert ODataV4Client._extract_type_name("") == ""
350350

351351
def test_bracket_quoted_schema_and_type(self):
352-
"""Bracket-quoted identifiers like [Schema].[Type]."""
353-
assert ODataV4Client._extract_type_name("[APICUST].[METADATA]") == "[METADATA]"
354-
assert ODataV4Client._extract_type_name("[Schema].[MyTable]") == "[MyTable]"
352+
"""Bracket-quoted identifiers like [Schema].[Type] - brackets are stripped."""
353+
assert ODataV4Client._extract_type_name("[APICUST].[METADATA]") == "METADATA"
354+
assert ODataV4Client._extract_type_name("[Schema].[MyTable]") == "MyTable"
355355

356356
def test_namespace_with_bracket_type(self):
357-
"""Namespace.with.[BracketType] format."""
358-
assert ODataV4Client._extract_type_name("Namespace.[METADATA]") == "[METADATA]"
359-
assert ODataV4Client._extract_type_name("My.Namespace.[Type]") == "[Type]"
357+
"""Namespace.with.[BracketType] format - brackets are stripped."""
358+
assert ODataV4Client._extract_type_name("Namespace.[METADATA]") == "METADATA"
359+
assert ODataV4Client._extract_type_name("My.Namespace.[Type]") == "Type"
360360

361361
def test_single_bracket_quoted_type(self):
362-
"""Single bracket-quoted type name."""
363-
assert ODataV4Client._extract_type_name("[METADATA]") == "[METADATA]"
362+
"""Single bracket-quoted type name - brackets are stripped."""
363+
assert ODataV4Client._extract_type_name("[METADATA]") == "METADATA"
364364

365365

366366
class TestCaseInsensitiveEntitySetLookup:

utils/parquet/upload_parquet.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,33 @@
3434
logger = logging.getLogger("utils.parquet.upload_parquet")
3535

3636

37+
def _sanitize_table_name(name: str) -> str:
38+
"""Sanitize a table name by removing brackets and replacing dots with underscores.
39+
40+
This handles OData entity set names like '[APICUST].[METADATA]' which would
41+
otherwise be misinterpreted as schema-qualified table names.
42+
43+
Args:
44+
name: Raw table name that may contain brackets and dots
45+
46+
Returns:
47+
Sanitized table name safe for use as a SQL identifier
48+
"""
49+
# Remove square brackets
50+
sanitized = name.replace("[", "").replace("]", "")
51+
# Replace dots with underscores to avoid schema.table interpretation
52+
sanitized = sanitized.replace(".", "_")
53+
return sanitized
54+
55+
3756
def _parse_parquet_base_name(filename: str) -> str:
3857
"""Derive the logical table base name from a parquet filename."""
3958

4059
stem = Path(filename).stem
4160
m = re.match(r"^(?P<base>.+)_part\d+$", stem)
42-
return m.group("base") if m else stem
61+
base = m.group("base") if m else stem
62+
# Sanitize to handle bracket-quoted OData names
63+
return _sanitize_table_name(base)
4364

4465

4566
def group_parquet_files(
@@ -228,7 +249,8 @@ def upload_parquet(
228249
for idx, fname in enumerate(files):
229250
path = os.path.join(input_dir, fname)
230251
logger.info("🔹 Processing %s", path)
231-
df = pl.read_parquet(path)
252+
# Use glob=False to prevent brackets in filenames being treated as glob patterns
253+
df = pl.read_parquet(path, glob=False)
232254
df = df.rename({col: col.lower() for col in df.columns})
233255

234256
if dialect == "postgresql":

0 commit comments

Comments
 (0)