cryoEncryp
diff --git a/‎.github/regression/micro.csv‎
Lines changed: 2 additions & 1 deletion b/‎.github/regression/micro.csv‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎benchmark/micro/optimizer/topn_window_elimination.benchmark‎
Lines changed: 23 additions & 0 deletions b/‎benchmark/micro/optimizer/topn_window_elimination.benchmark‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎extension/parquet/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎extension/parquet/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎extension/parquet/column_writer.cpp‎
Lines changed: 43 additions & 18 deletions b/‎extension/parquet/column_writer.cpp‎
Lines changed: 43 additions & 18 deletions
diff --git a/‎extension/parquet/include/column_writer.hpp‎
Lines changed: 5 additions & 4 deletions b/‎extension/parquet/include/column_writer.hpp‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎extension/parquet/include/parquet.json‎
Lines changed: 42 additions & 2 deletions b/‎extension/parquet/include/parquet.json‎
Lines changed: 42 additions & 2 deletions
diff --git a/‎extension/parquet/include/parquet_field_id.hpp‎
Lines changed: 39 additions & 0 deletions b/‎extension/parquet/include/parquet_field_id.hpp‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎extension/parquet/include/parquet_shredding.hpp‎
Lines changed: 49 additions & 0 deletions b/‎extension/parquet/include/parquet_shredding.hpp‎
Lines changed: 49 additions & 0 deletions
@@ -32,4 +32,5 @@ benchmark/micro/logger/logging_overhead/parquet_q1_with_default_logging.benchmar
 benchmark/micro/logger/logging_overhead/duckdb_persistent_q1_with_default_logging.benchmark
 benchmark/micro/logger/storage/file/log_message_size/huge_string.benchmark
 benchmark/micro/logger/storage/file/log_message_size/small_string.benchmark
-benchmark/micro/filter/choose_correct_filter_function.benchmark
+benchmark/micro/filter/choose_correct_filter_function.benchmark
+benchmark/micro/optimizer/topn_window_elimination.benchmark
@@ -0,0 +1,23 @@
+# name: benchmark/micro/optimizer/topn_window_elimination.benchmark
+# description: Benchmark of top n window elimination
+# group: [optimizer]
+
+name TopN Window Elimination
+group micro
+subgroup optimizer
+
+load
+CREATE TABLE metrics AS (
+	SELECT k, '2001-01-01 00:00:00'::TIMESTAMP + INTERVAL (v) MINUTE AS tm, v AS v1, v % 1000 AS v2, v % 100 as v3
+	FROM range(0,100000) vals(v), range(0,100) keys(k)
+);
+CREATE TABLE tags AS (
+	SELECT k, CAST(hash(k+1) AS VARCHAR) t1, CAST(hash(k+2) AS VARCHAR) t2, CAST(hash(k+3) AS VARCHAR) t3,
+	FROM range(0,100) keys(k)
+);
+
+run
+SELECT * FROM tags t INNER JOIN LATERAL (SELECT * FROM metrics m WHERE m.k = t.k ORDER BY tm DESC LIMIT 1) AS b ON true ORDER BY t.k, b.tm DESC;
+SELECT * FROM tags t INNER JOIN LATERAL (SELECT * FROM metrics m WHERE m.k = t.k ORDER BY tm DESC LIMIT 3) AS b ON true ORDER BY t.k, b.tm DESC;
+SELECT * FROM tags t, (SELECT *, row_number() OVER (PARTITION BY m.k ORDER BY m.tm DESC) rn FROM metrics m QUALIFY rn <= 1) m WHERE t.k = m.k;
+SELECT * FROM tags t, (SELECT *, row_number() OVER (PARTITION BY m.k ORDER BY m.tm DESC) rn FROM metrics m QUALIFY rn <= 3) m WHERE t.k = m.k;
@@ -21,9 +21,11 @@ set(PARQUET_EXTENSION_FILES
     parquet_multi_file_info.cpp
     parquet_metadata.cpp
     parquet_reader.cpp
+    parquet_field_id.cpp
     parquet_statistics.cpp
     parquet_timestamp.cpp
     parquet_writer.cpp
+    parquet_shredding.cpp
     serialize_parquet.cpp
     zstd_file_system.cpp
     geo_parquet.cpp)
 
@@ -246,7 +246,8 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat
 
 ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::SchemaElement> &schemas,
                                                     const LogicalType &type, const string &name,
-                                                    optional_ptr<const ChildFieldIDs> field_ids, idx_t max_repeat,
+                                                    optional_ptr<const ChildFieldIDs> field_ids,
+                                                    optional_ptr<const ShreddingType> shredding_types, idx_t max_repeat,
                                                     idx_t max_define, bool can_have_nulls) {
 	auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
 	if (!can_have_nulls) {
@@ -263,6 +264,10 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
 			child_field_ids = &field_id->child_field_ids;
 		}
 	}
+	optional_ptr<const ShreddingType> shredding_type;
+	if (shredding_types) {
+		shredding_type = shredding_types->GetChild(name);
+	}
 
 	if (type.id() == LogicalTypeId::STRUCT && type.GetAlias() == "PARQUET_VARIANT") {
 		// variant type
@@ -273,32 +278,53 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
 		//	[<typed_value>]
 		// }
 
-		const bool is_shredded = false;
+		const bool is_shredded = shredding_type != nullptr;
+
+		child_list_t<LogicalType> child_types;
+		child_types.emplace_back("metadata", LogicalType::BLOB);
+		child_types.emplace_back("value", LogicalType::BLOB);
+		if (is_shredded) {
+			auto &typed_value_type = shredding_type->type;
+			if (typed_value_type.id() != LogicalTypeId::ANY) {
+				child_types.emplace_back("typed_value",
+				                         VariantColumnWriter::TransformTypedValueRecursive(typed_value_type));
+			}
+		}
 
 		// variant group
 		duckdb_parquet::SchemaElement top_element;
 		top_element.repetition_type = null_type;
-		top_element.num_children = is_shredded ? 3 : 2;
+		top_element.num_children = child_types.size();
 		top_element.logicalType.__isset.VARIANT = true;
 		top_element.logicalType.VARIANT.__isset.specification_version = true;
 		top_element.logicalType.VARIANT.specification_version = 1;
 		top_element.__isset.logicalType = true;
 		top_element.__isset.num_children = true;
 		top_element.__isset.repetition_type = true;
+		top_element.name = name;
 		schemas.push_back(std::move(top_element));
 
-		child_list_t<LogicalType> child_types;
-		child_types.emplace_back("metadata", LogicalType::BLOB);
-		child_types.emplace_back("value", LogicalType::BLOB);
-		if (is_shredded) {
-			throw NotImplementedException("Writing shredded VARIANT isn't supported for Parquet yet");
-		}
-
 		ParquetColumnSchema variant_column(name, type, max_define, max_repeat, schema_idx, 0);
 		variant_column.children.reserve(child_types.size());
 		for (auto &child_type : child_types) {
+			auto &child_name = child_type.first;
+			bool is_optional;
+			if (child_name == "metadata") {
+				is_optional = false;
+			} else if (child_name == "value") {
+				if (is_shredded) {
+					//! When shredding the variant, the 'value' becomes optional
+					is_optional = true;
+				} else {
+					is_optional = false;
+				}
+			} else {
+				D_ASSERT(child_name == "typed_value");
+				is_optional = true;
+			}
 			variant_column.children.emplace_back(FillParquetSchema(schemas, child_type.second, child_type.first,
-			                                                       child_field_ids, max_repeat, max_define + 1, false));
+			                                                       child_field_ids, shredding_type, max_repeat,
+			                                                       max_define + 1, is_optional));
 		}
 		return variant_column;
 	}
@@ -324,7 +350,8 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
 		struct_column.children.reserve(child_types.size());
 		for (auto &child_type : child_types) {
 			struct_column.children.emplace_back(FillParquetSchema(schemas, child_type.second, child_type.first,
-			                                                      child_field_ids, max_repeat, max_define + 1));
+			                                                      child_field_ids, shredding_type, max_repeat,
+			                                                      max_define + 1, true));
 		}
 		return struct_column;
 	}
@@ -360,8 +387,8 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
 		schemas.push_back(std::move(repeated_element));
 
 		ParquetColumnSchema list_column(name, type, max_define, max_repeat, schema_idx, 0);
-		list_column.children.push_back(
-		    FillParquetSchema(schemas, child_type, "element", child_field_ids, max_repeat + 1, max_define + 2));
+		list_column.children.push_back(FillParquetSchema(schemas, child_type, "element", child_field_ids,
+		                                                 shredding_type, max_repeat + 1, max_define + 2, true));
 		return list_column;
 	}
 	if (type.id() == LogicalTypeId::MAP) {
@@ -408,8 +435,8 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
 		for (idx_t i = 0; i < 2; i++) {
 			// key needs to be marked as REQUIRED
 			bool is_key = i == 0;
-			auto child_schema = FillParquetSchema(schemas, kv_types[i], kv_names[i], child_field_ids, max_repeat + 1,
-			                                      max_define + 2, !is_key);
+			auto child_schema = FillParquetSchema(schemas, kv_types[i], kv_names[i], child_field_ids, shredding_type,
+			                                      max_repeat + 1, max_define + 2, !is_key);
 
 			map_column.children.push_back(std::move(child_schema));
 		}
@@ -441,8 +468,6 @@ ColumnWriter::CreateWriterRecursive(ClientContext &context, ParquetWriter &write
 	path_in_schema.push_back(schema.name);
 
 	if (type.id() == LogicalTypeId::STRUCT && type.GetAlias() == "PARQUET_VARIANT") {
-		D_ASSERT(schema.children.size() == 2); //! NOTE: shredded variants not supported yet
-
 		vector<unique_ptr<ColumnWriter>> child_writers;
 		child_writers.reserve(schema.children.size());
 		for (idx_t i = 0; i < schema.children.size(); i++) {
 
@@ -18,6 +18,7 @@ class ParquetWriter;
 class ColumnWriterPageState;
 class PrimitiveColumnWriterState;
 struct ChildFieldIDs;
+struct ShreddingType;
 class ResizeableBuffer;
 class ParquetBloomFilter;
 
@@ -88,10 +89,10 @@ class ColumnWriter {
 		return column_schema.max_repeat;
 	}
 
-	static ParquetColumnSchema FillParquetSchema(vector<duckdb_parquet::SchemaElement> &schemas,
-	                                             const LogicalType &type, const string &name,
-	                                             optional_ptr<const ChildFieldIDs> field_ids, idx_t max_repeat = 0,
-	                                             idx_t max_define = 1, bool can_have_nulls = true);
+	static ParquetColumnSchema
+	FillParquetSchema(vector<duckdb_parquet::SchemaElement> &schemas, const LogicalType &type, const string &name,
+	                  optional_ptr<const ChildFieldIDs> field_ids, optional_ptr<const ShreddingType> shredding_types,
+	                  idx_t max_repeat = 0, idx_t max_define = 1, bool can_have_nulls = true);
 	//! Create the column writer for a specific type recursively
 	static unique_ptr<ColumnWriter> CreateWriterRecursive(ClientContext &context, ParquetWriter &writer,
 	                                                      const vector<duckdb_parquet::SchemaElement> &parquet_schemas,
 
@@ -116,7 +116,7 @@
   {
     "class": "FieldID",
     "includes": [
-      "parquet_writer.hpp"
+      "parquet_field_id.hpp"
     ],
     "members": [
       {
@@ -140,7 +140,7 @@
   {
     "class": "ChildFieldIDs",
     "includes": [
-      "parquet_writer.hpp"
+      "parquet_field_id.hpp"
     ],
     "members": [
       {
@@ -152,5 +152,45 @@
       }
     ],
     "pointer_type": "none"
+  },
+  {
+    "class": "ShreddingType",
+    "includes": [
+      "parquet_shredding.hpp"
+    ],
+    "members": [
+      {
+        "id": 100,
+        "name": "set",
+        "type": "bool"
+      },
+      {
+        "id": 101,
+        "name": "type",
+        "type": "LogicalType"
+      },
+      {
+        "id": 102,
+        "name": "children",
+        "type": "ChildShreddingTypes"
+      }
+    ],
+    "pointer_type": "none"
+  },
+  {
+    "class": "ChildShreddingTypes",
+    "includes": [
+      "parquet_shredding.hpp"
+    ],
+    "members": [
+      {
+        "id": 100,
+        "name": "types",
+        "type": "case_insensitive_map_t<ShreddingType>",
+        "serialize_property": "types.operator*()",
+        "deserialize_property": "types.operator*()"
+      }
+    ],
+    "pointer_type": "none"
   }
 ]
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "duckdb/common/serializer/buffered_file_writer.hpp"
+#include "duckdb/common/case_insensitive_map.hpp"
+
+namespace duckdb {
+
+struct FieldID;
+struct ChildFieldIDs {
+	ChildFieldIDs();
+	ChildFieldIDs Copy() const;
+	unique_ptr<case_insensitive_map_t<FieldID>> ids;
+
+	void Serialize(Serializer &serializer) const;
+	static ChildFieldIDs Deserialize(Deserializer &source);
+};
+
+struct FieldID {
+public:
+	static constexpr const auto DUCKDB_FIELD_ID = "__duckdb_field_id";
+	FieldID();
+	explicit FieldID(int32_t field_id);
+	FieldID Copy() const;
+	bool set;
+	int32_t field_id;
+	ChildFieldIDs child_field_ids;
+
+	void Serialize(Serializer &serializer) const;
+	static FieldID Deserialize(Deserializer &source);
+
+public:
+	static void GenerateFieldIDs(ChildFieldIDs &field_ids, idx_t &field_id, const vector<string> &names,
+	                             const vector<LogicalType> &sql_types);
+	static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
+	                        unordered_set<uint32_t> &unique_field_ids,
+	                        const case_insensitive_map_t<LogicalType> &name_to_type_map);
+};
+
+} // namespace duckdb
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "duckdb/common/serializer/buffered_file_writer.hpp"
+#include "duckdb/common/case_insensitive_map.hpp"
+#include "duckdb/common/types/variant.hpp"
+
+namespace duckdb {
+
+struct ShreddingType;
+
+struct ChildShreddingTypes {
+public:
+	ChildShreddingTypes();
+
+public:
+	ChildShreddingTypes Copy() const;
+
+public:
+	void Serialize(Serializer &serializer) const;
+	static ChildShreddingTypes Deserialize(Deserializer &source);
+
+public:
+	unique_ptr<case_insensitive_map_t<ShreddingType>> types;
+};
+
+struct ShreddingType {
+public:
+	ShreddingType();
+	explicit ShreddingType(const LogicalType &type);
+
+public:
+	ShreddingType Copy() const;
+
+public:
+	void Serialize(Serializer &serializer) const;
+	static ShreddingType Deserialize(Deserializer &source);
+
+public:
+	static ShreddingType GetShreddingTypes(const Value &val);
+	void AddChild(const string &name, ShreddingType &&child);
+	optional_ptr<const ShreddingType> GetChild(const string &name) const;
+
+public:
+	bool set = false;
+	LogicalType type;
+	ChildShreddingTypes children;
+};
+
+} // namespace duckdb