buraksenn · pull · Apr 3, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/.asf.yaml b/.asf.yaml
@@ -51,6 +51,12 @@ github:
     main:
       required_pull_request_reviews:
         required_approving_review_count: 1
+      required_status_checks:
+        contexts:
+          - "Check License Header"
+          - "Use prettier to check formatting of documents"
+          - "Validate required_status_checks in .asf.yaml"
+          - "Spell Check with Typos"
     # needs to be updated as part of the release process
     # .asf.yaml doesn't support wildcard branch protection rules, only exact branch names
     # https://github.com/apache/infrastructure-asfyaml?tab=readme-ov-file#branch-protection

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
@@ -51,7 +51,15 @@
       # if you encounter error, see instructions inside the script
         run: ci/scripts/doc_prettier_check.sh
 
+  asf-yaml-check:
+    name: Validate required_status_checks in .asf.yaml
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - run: pip install pyyaml
+      - run: python3 ci/scripts/check_asf_yaml_status_checks.py
+
   typos:
     name: Spell Check with Typos
     runs-on: ubuntu-latest
    steps:

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -170,7 +170,7 @@ liblzma = { version = "0.4.6", features = ["static"] }
 log = "^0.4"
 memchr = "2.8.0"
 num-traits = { version = "0.2" }
-object_store = { version = "0.13.1", default-features = false }
+object_store = { version = "0.13.2", default-features = false }
 parking_lot = "0.12"
 parquet = { version = "58.1.0", default-features = false, features = [
     "arrow",

diff --git a/ci/scripts/check_asf_yaml_status_checks.py b/ci/scripts/check_asf_yaml_status_checks.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Validate that every entry in .asf.yaml required_status_checks
+matches an actual GitHub Actions job name, and that the workflow
+is not filtered by paths/paths-ignore (which would prevent the
+check from running on some PRs, blocking merges).
+
+A typo or stale entry in required_status_checks will block all
+merges for the project, so this check catches that early.
+"""
+
+import glob
+import os
+import sys
+
+import yaml
+
+
+def get_required_checks(asf_yaml_path):
+    """Extract all required_status_checks contexts from .asf.yaml."""
+    with open(asf_yaml_path) as f:
+        config = yaml.safe_load(f)
+
+    checks = {}  # context -> list of branches requiring it
+    branches = config.get("github", {}).get("protected_branches", {})
+    for branch, settings in branches.items():
+        contexts = (
+            settings.get("required_status_checks", {}).get("contexts", [])
+        )
+        for ctx in contexts:
+            checks.setdefault(ctx, []).append(branch)
+
+    return checks
+
+
+def get_workflow_jobs(workflows_dir):
+    """Collect all jobs with their metadata from GitHub Actions workflow files.
+
+    Returns a dict mapping job identifier (name or key) to a list of
+    (workflow_file, has_path_filters) tuples.
+    """
+    jobs = {}  # identifier -> [(workflow_file, has_path_filters)]
+    for workflow_file in sorted(glob.glob(os.path.join(workflows_dir, "*.yml"))):
+        with open(workflow_file) as f:
+            workflow = yaml.safe_load(f)
+
+        if not workflow or "jobs" not in workflow:
+            continue
+
+        # Check if pull_request trigger has path filters
+        on = workflow.get(True, workflow.get("on", {}))  # yaml parses `on:` as True
+        pr_trigger = on.get("pull_request", {}) if isinstance(on, dict) else {}
+        has_path_filters = bool(
+            isinstance(pr_trigger, dict)
+            and (pr_trigger.get("paths") or pr_trigger.get("paths-ignore"))
+        )
+
+        basename = os.path.basename(workflow_file)
+        for job_key, job_config in workflow.get("jobs", {}).items():
+            if not isinstance(job_config, dict):
+                continue
+            job_name = job_config.get("name", job_key)
+            info = (basename, has_path_filters)
+            jobs.setdefault(job_name, []).append(info)
+            if job_key != job_name:
+                jobs.setdefault(job_key, []).append(info)
+
+    return jobs
+
+
+def main():
+    repo_root = os.path.dirname(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    )
+    asf_yaml = os.path.join(repo_root, ".asf.yaml")
+    workflows_dir = os.path.join(repo_root, ".github", "workflows")
+
+    required_checks = get_required_checks(asf_yaml)
+    if not required_checks:
+        print("No required_status_checks found in .asf.yaml — nothing to validate.")
+        return
+
+    jobs = get_workflow_jobs(workflows_dir)
+    errors = []
+
+    for ctx in sorted(required_checks):
+        branches = ", ".join(sorted(required_checks[ctx]))
+        if ctx not in jobs:
+            errors.append(
+                f'  - "{ctx}" (branch: {branches}): '
+                f"not found in any GitHub Actions workflow"
+            )
+            continue
+
+        # Check if ALL workflows providing this job have path filters
+        # (if at least one doesn't, the check will still run)
+        filtered_workflows = [
+            wf for wf, has_filter in jobs[ctx] if has_filter
+        ]
+        unfiltered_workflows = [
+            wf for wf, has_filter in jobs[ctx] if not has_filter
+        ]
+        if filtered_workflows and not unfiltered_workflows:
+            wf_list = ", ".join(filtered_workflows)
+            errors.append(
+                f'  - "{ctx}" (branch: {branches}): '
+                f"workflow {wf_list} uses paths/paths-ignore filters on "
+                f"pull_request, so this check won't run for some PRs "
+                f"and will block merging"
+            )
+
+    if errors:
+        print("ERROR: Problems found with required_status_checks in .asf.yaml:\n")
+        print("\n".join(errors))
+        print()
+        print("Available job names across all workflows:")
+        for name in sorted(jobs):
+            print(f"  - {name}")
+        sys.exit(1)
+
+    print(
+        f"OK: All {len(required_checks)} required_status_checks "
+        "match existing GitHub Actions jobs."
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -1704,7 +1704,7 @@ mod tests {
         let state = session_ctx.state();
         let location = Path::from_filesystem_path(".")
             .unwrap()
-            .child("invalid.parquet");
+            .join("invalid.parquet");
 
         let partitioned_file = PartitionedFile::new_from_meta(ObjectMeta {
             location,

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -1198,7 +1198,14 @@ impl DefaultPhysicalPlanner {
                 let new_sort = SortExec::new(ordering, physical_input).with_fetch(*fetch);
                 Arc::new(new_sort)
             }
-            LogicalPlan::Subquery(_) => todo!(),
+            // The optimizer's decorrelation passes remove Subquery nodes
+            // for supported patterns. This error is hit for correlated
+            // patterns that the optimizer cannot (yet) decorrelate.
+            LogicalPlan::Subquery(_) => {
+                return not_impl_err!(
+                    "Physical plan does not support undecorrelated Subquery"
+                );
+            }
             LogicalPlan::SubqueryAlias(_) => children.one()?,
             LogicalPlan::Limit(limit) => {
                 let input = children.one()?;

diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs
@@ -774,7 +774,7 @@ impl ObjectStore for MirroringObjectStore {
             };
 
             if parts.next().is_some() {
-                common_prefixes.insert(prefix.child(common_prefix));
+                common_prefixes.insert(prefix.clone().join(common_prefix));
             } else {
                 let object = ObjectMeta {
                     location: k.clone(),

diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs
@@ -536,7 +536,7 @@ mod tests {
         let root = root.to_string_lossy();
 
         let url = ListingTableUrl::parse(root).unwrap();
-        let child = url.prefix.child("partition").child("file");
+        let child = url.prefix.clone().join("partition").join("file");
 
         let prefix: Vec<_> = url.strip_prefix(&child).unwrap().collect();
         assert_eq!(prefix, vec!["partition", "file"]);

diff --git a/datafusion/datasource/src/write/demux.rs b/datafusion/datasource/src/write/demux.rs
@@ -260,7 +260,8 @@ fn generate_file_path(
     if !single_file_output {
         base_output_path
             .prefix()
-            .child(format!("{write_id}_{part_idx}.{file_extension}"))
+            .clone()
+            .join(format!("{write_id}_{part_idx}.{file_extension}"))
     } else {
         base_output_path.prefix().to_owned()
     }
@@ -588,8 +589,8 @@ fn compute_hive_style_file_path(
 ) -> Path {
     let mut file_path = base_output_path.prefix().clone();
     for j in 0..part_key.len() {
-        file_path = file_path.child(format!("{}={}", partition_by[j].0, part_key[j]));
+        file_path = file_path.join(format!("{}={}", partition_by[j].0, part_key[j]));
     }
 
-    file_path.child(format!("{write_id}.{file_extension}"))
+    file_path.join(format!("{write_id}.{file_extension}"))
 }
diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs
@@ -37,7 +37,24 @@ The `current_date()` return value is determined at query time and will return th
 "#,
     syntax_example = r#"current_date()
     (optional) SET datafusion.execution.time_zone = '+00:00';
-    SELECT current_date();"#
+    SELECT current_date();"#,
+    sql_example = r#"```sql
+> SELECT current_date();
++----------------+
+| current_date() |
++----------------+
+| 2024-12-23     |
++----------------+
+
+-- The current date is based on the session time zone (UTC by default)
+> SET datafusion.execution.time_zone = 'Asia/Tokyo';
+> SELECT current_date();
++----------------+
+| current_date() |
++----------------+
+| 2024-12-24     |
++----------------+
+```"#
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct CurrentDateFunc {

diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs
@@ -40,7 +40,24 @@ The session time zone can be set using the statement 'SET datafusion.execution.t
 "#,
     syntax_example = r#"current_time()
     (optional) SET datafusion.execution.time_zone = '+00:00';
-    SELECT current_time();"#
+    SELECT current_time();"#,
+    sql_example = r#"```sql
+> SELECT current_time();
++--------------------+
+| current_time()     |
++--------------------+
+| 06:30:00.123456789 |
++--------------------+
+
+-- The current time is based on the session time zone (UTC by default)
+> SET datafusion.execution.time_zone = 'Asia/Tokyo';
+> SELECT current_time();
++--------------------+
+| current_time()     |
++--------------------+
+| 15:30:00.123456789 |
++--------------------+
+```"#
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct CurrentTimeFunc {

diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs
@@ -86,7 +86,21 @@ use datafusion_macros::user_doc;
     argument(
         name = "expression",
         description = "Time expression to operate on. Can be a constant, column, or function."
-    )
+    ),
+    sql_example = r#"```sql
+> SELECT date_part('year', '2024-05-01T00:00:00');
++-----------------------------------------------------+
+| date_part(Utf8("year"),Utf8("2024-05-01T00:00:00")) |
++-----------------------------------------------------+
+| 2024                                                |
++-----------------------------------------------------+
+> SELECT extract(day FROM timestamp '2024-05-01T00:00:00');
++----------------------------------------------------+
+| date_part(Utf8("DAY"),Utf8("2024-05-01T00:00:00")) |
++----------------------------------------------------+
+| 1                                                  |
++----------------------------------------------------+
+```"#
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct DatePartFunc {

diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs
@@ -166,7 +166,21 @@ impl DateTruncGranularity {
     argument(
         name = "expression",
         description = "Timestamp or time expression to operate on. Can be a constant, column, or function."
-    )
+    ),
+    sql_example = r#"```sql
+> SELECT date_trunc('month', '2024-05-15T10:30:00');
++-----------------------------------------------+
+| date_trunc(Utf8("month"),Utf8("2024-05-15T10:30:00")) |
++-----------------------------------------------+
+| 2024-05-01T00:00:00                           |
++-----------------------------------------------+
+> SELECT date_trunc('hour', '2024-05-15T10:30:00');
++----------------------------------------------+
+| date_trunc(Utf8("hour"),Utf8("2024-05-15T10:30:00")) |
++----------------------------------------------+
+| 2024-05-15T10:00:00                          |
++----------------------------------------------+
+```"#
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct DateTruncFunc {