@@ -246,7 +246,8 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat
246246
247247ParquetColumnSchema ColumnWriter::FillParquetSchema (vector<duckdb_parquet::SchemaElement> &schemas,
248248 const LogicalType &type, const string &name,
249- optional_ptr<const ChildFieldIDs> field_ids, idx_t max_repeat,
249+ optional_ptr<const ChildFieldIDs> field_ids,
250+ optional_ptr<const ShreddingType> shredding_types, idx_t max_repeat,
250251 idx_t max_define, bool can_have_nulls) {
251252 auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
252253 if (!can_have_nulls) {
@@ -263,6 +264,10 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
263264 child_field_ids = &field_id->child_field_ids ;
264265 }
265266 }
267+ optional_ptr<const ShreddingType> shredding_type;
268+ if (shredding_types) {
269+ shredding_type = shredding_types->GetChild (name);
270+ }
266271
267272 if (type.id () == LogicalTypeId::STRUCT && type.GetAlias () == " PARQUET_VARIANT" ) {
268273 // variant type
@@ -273,32 +278,53 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
273278 // [<typed_value>]
274279 // }
275280
276- const bool is_shredded = false ;
281+ const bool is_shredded = shredding_type != nullptr ;
282+
283+ child_list_t <LogicalType> child_types;
284+ child_types.emplace_back (" metadata" , LogicalType::BLOB);
285+ child_types.emplace_back (" value" , LogicalType::BLOB);
286+ if (is_shredded) {
287+ auto &typed_value_type = shredding_type->type ;
288+ if (typed_value_type.id () != LogicalTypeId::ANY) {
289+ child_types.emplace_back (" typed_value" ,
290+ VariantColumnWriter::TransformTypedValueRecursive (typed_value_type));
291+ }
292+ }
277293
278294 // variant group
279295 duckdb_parquet::SchemaElement top_element;
280296 top_element.repetition_type = null_type;
281- top_element.num_children = is_shredded ? 3 : 2 ;
297+ top_element.num_children = child_types. size () ;
282298 top_element.logicalType .__isset .VARIANT = true ;
283299 top_element.logicalType .VARIANT .__isset .specification_version = true ;
284300 top_element.logicalType .VARIANT .specification_version = 1 ;
285301 top_element.__isset .logicalType = true ;
286302 top_element.__isset .num_children = true ;
287303 top_element.__isset .repetition_type = true ;
304+ top_element.name = name;
288305 schemas.push_back (std::move (top_element));
289306
290- child_list_t <LogicalType> child_types;
291- child_types.emplace_back (" metadata" , LogicalType::BLOB);
292- child_types.emplace_back (" value" , LogicalType::BLOB);
293- if (is_shredded) {
294- throw NotImplementedException (" Writing shredded VARIANT isn't supported for Parquet yet" );
295- }
296-
297307 ParquetColumnSchema variant_column (name, type, max_define, max_repeat, schema_idx, 0 );
298308 variant_column.children .reserve (child_types.size ());
299309 for (auto &child_type : child_types) {
310+ auto &child_name = child_type.first ;
311+ bool is_optional;
312+ if (child_name == " metadata" ) {
313+ is_optional = false ;
314+ } else if (child_name == " value" ) {
315+ if (is_shredded) {
316+ // ! When shredding the variant, the 'value' becomes optional
317+ is_optional = true ;
318+ } else {
319+ is_optional = false ;
320+ }
321+ } else {
322+ D_ASSERT (child_name == " typed_value" );
323+ is_optional = true ;
324+ }
300325 variant_column.children .emplace_back (FillParquetSchema (schemas, child_type.second , child_type.first ,
301- child_field_ids, max_repeat, max_define + 1 , false ));
326+ child_field_ids, shredding_type, max_repeat,
327+ max_define + 1 , is_optional));
302328 }
303329 return variant_column;
304330 }
@@ -324,7 +350,8 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
324350 struct_column.children .reserve (child_types.size ());
325351 for (auto &child_type : child_types) {
326352 struct_column.children .emplace_back (FillParquetSchema (schemas, child_type.second , child_type.first ,
327- child_field_ids, max_repeat, max_define + 1 ));
353+ child_field_ids, shredding_type, max_repeat,
354+ max_define + 1 , true ));
328355 }
329356 return struct_column;
330357 }
@@ -360,8 +387,8 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
360387 schemas.push_back (std::move (repeated_element));
361388
362389 ParquetColumnSchema list_column (name, type, max_define, max_repeat, schema_idx, 0 );
363- list_column.children .push_back (
364- FillParquetSchema (schemas, child_type, " element " , child_field_ids , max_repeat + 1 , max_define + 2 ));
390+ list_column.children .push_back (FillParquetSchema (schemas, child_type, " element " , child_field_ids,
391+ shredding_type , max_repeat + 1 , max_define + 2 , true ));
365392 return list_column;
366393 }
367394 if (type.id () == LogicalTypeId::MAP) {
@@ -408,8 +435,8 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
408435 for (idx_t i = 0 ; i < 2 ; i++) {
409436 // key needs to be marked as REQUIRED
410437 bool is_key = i == 0 ;
411- auto child_schema = FillParquetSchema (schemas, kv_types[i], kv_names[i], child_field_ids, max_repeat + 1 ,
412- max_define + 2 , !is_key);
438+ auto child_schema = FillParquetSchema (schemas, kv_types[i], kv_names[i], child_field_ids, shredding_type ,
439+ max_repeat + 1 , max_define + 2 , !is_key);
413440
414441 map_column.children .push_back (std::move (child_schema));
415442 }
@@ -441,8 +468,6 @@ ColumnWriter::CreateWriterRecursive(ClientContext &context, ParquetWriter &write
441468 path_in_schema.push_back (schema.name );
442469
443470 if (type.id () == LogicalTypeId::STRUCT && type.GetAlias () == " PARQUET_VARIANT" ) {
444- D_ASSERT (schema.children .size () == 2 ); // ! NOTE: shredded variants not supported yet
445-
446471 vector<unique_ptr<ColumnWriter>> child_writers;
447472 child_writers.reserve (schema.children .size ());
448473 for (idx_t i = 0 ; i < schema.children .size (); i++) {
0 commit comments