odata_to_staging: fix parse + upload

lukakoning · lukakoning · commit ee229cdbe78b · 2026-01-15T15:44:25.000+01:00
diff --git a/odata_to_staging/functions/odata_v4_client.py b/odata_to_staging/functions/odata_v4_client.py
@@ -96,11 +96,13 @@ def _extract_type_name(full_type: str) -> str:
             full_type: Fully-qualified type string (e.g., "Namespace.Type")
 
         Returns:
-            Just the type name portion
+            Just the type name portion (without brackets)
         """
         if not full_type:
             return ""
 
+        result: str
+
         # Handle bracket-quoted identifiers: "[Schema].[Type]" or "Namespace.[Type]"
         # Split on dots that are NOT inside brackets
         # Strategy: find the last segment, which may be bracket-quoted
@@ -110,7 +112,11 @@ def _extract_type_name(full_type: str) -> str:
             if last_bracket > 0:
                 # Check if there's a dot before the bracket
                 if full_type[last_bracket - 1] == ".":
-                    return full_type[last_bracket:]
+                    result = full_type[last_bracket:]
+                    # Strip surrounding brackets if present
+                    if result.startswith("[") and result.endswith("]"):
+                        return result[1:-1]
+                    return result
             # If the whole thing starts with '[', it might be the full type name
             if full_type.startswith("["):
                 # Look for pattern like "[Schema].[Type]" - return last bracketed segment
@@ -133,7 +139,11 @@ def _extract_type_name(full_type: str) -> str:
                 if current:
                     parts.append(current)
                 if parts:
-                    return parts[-1]
+                    result = parts[-1]
+                    # Strip surrounding brackets if present
+                    if result.startswith("[") and result.endswith("]"):
+                        return result[1:-1]
+                    return result
 
         # Standard case: "Namespace.TypeName" -> "TypeName"
         return full_type.split(".")[-1]
@@ -437,6 +447,9 @@ def get_entity_properties(
     def _find_entity_type(self, type_name: str) -> Optional[Dict[str, Any]]:
         """Find EntityType by name with case-insensitive fallback.
 
+        Also handles bracket-quoted names where the type might be stored with
+        a full name like '[Schema].[Type]' but we're searching for 'Type'.
+
         Args:
             type_name: Name of the EntityType to find
 
@@ -457,7 +470,19 @@ def _find_entity_type(self, type_name: str) -> Optional[Dict[str, Any]]:
             ),
             None,
         )
-        return entity_type
+        if entity_type:
+            return entity_type
+
+        # Try matching against extracted type names (for bracket-quoted full names)
+        # e.g., type_name='METADATA' should match stored name='[APICUST].[METADATA]'
+        for name, et in self.schema["entity_types"].items():
+            extracted_name = self._extract_type_name(name)
+            if extracted_name == type_name:
+                return et
+            if extracted_name.lower() == type_name.lower():
+                return et
+
+        return None
 
     def get_navigation_properties(self, entity_set_name: str) -> List[str]:
         """Return list of navigation property names for an EntitySet.
diff --git a/odata_to_staging/tests/test_odata_v4_client.py b/odata_to_staging/tests/test_odata_v4_client.py
@@ -349,18 +349,18 @@ def test_empty_string(self):
         assert ODataV4Client._extract_type_name("") == ""
 
     def test_bracket_quoted_schema_and_type(self):
-        """Bracket-quoted identifiers like [Schema].[Type]."""
-        assert ODataV4Client._extract_type_name("[APICUST].[METADATA]") == "[METADATA]"
-        assert ODataV4Client._extract_type_name("[Schema].[MyTable]") == "[MyTable]"
+        """Bracket-quoted identifiers like [Schema].[Type] - brackets are stripped."""
+        assert ODataV4Client._extract_type_name("[APICUST].[METADATA]") == "METADATA"
+        assert ODataV4Client._extract_type_name("[Schema].[MyTable]") == "MyTable"
 
     def test_namespace_with_bracket_type(self):
-        """Namespace.with.[BracketType] format."""
-        assert ODataV4Client._extract_type_name("Namespace.[METADATA]") == "[METADATA]"
-        assert ODataV4Client._extract_type_name("My.Namespace.[Type]") == "[Type]"
+        """Namespace.with.[BracketType] format - brackets are stripped."""
+        assert ODataV4Client._extract_type_name("Namespace.[METADATA]") == "METADATA"
+        assert ODataV4Client._extract_type_name("My.Namespace.[Type]") == "Type"
 
     def test_single_bracket_quoted_type(self):
-        """Single bracket-quoted type name."""
-        assert ODataV4Client._extract_type_name("[METADATA]") == "[METADATA]"
+        """Single bracket-quoted type name - brackets are stripped."""
+        assert ODataV4Client._extract_type_name("[METADATA]") == "METADATA"
 
 
 class TestCaseInsensitiveEntitySetLookup:
diff --git a/utils/parquet/upload_parquet.py b/utils/parquet/upload_parquet.py
@@ -34,12 +34,33 @@
 logger = logging.getLogger("utils.parquet.upload_parquet")
 
 
+def _sanitize_table_name(name: str) -> str:
+    """Sanitize a table name by removing brackets and replacing dots with underscores.
+
+    This handles OData entity set names like '[APICUST].[METADATA]' which would
+    otherwise be misinterpreted as schema-qualified table names.
+
+    Args:
+        name: Raw table name that may contain brackets and dots
+
+    Returns:
+        Sanitized table name safe for use as a SQL identifier
+    """
+    # Remove square brackets
+    sanitized = name.replace("[", "").replace("]", "")
+    # Replace dots with underscores to avoid schema.table interpretation
+    sanitized = sanitized.replace(".", "_")
+    return sanitized
+
+
 def _parse_parquet_base_name(filename: str) -> str:
     """Derive the logical table base name from a parquet filename."""
 
     stem = Path(filename).stem
     m = re.match(r"^(?P<base>.+)_part\d+$", stem)
-    return m.group("base") if m else stem
+    base = m.group("base") if m else stem
+    # Sanitize to handle bracket-quoted OData names
+    return _sanitize_table_name(base)
 
 
 def group_parquet_files(
@@ -228,7 +249,8 @@ def upload_parquet(
             for idx, fname in enumerate(files):
                 path = os.path.join(input_dir, fname)
                 logger.info("🔹 Processing %s", path)
-                df = pl.read_parquet(path)
+                # Use glob=False to prevent brackets in filenames being treated as glob patterns
+                df = pl.read_parquet(path, glob=False)
                 df = df.rename({col: col.lower() for col in df.columns})
 
                 if dialect == "postgresql":