@@ -40,9 +40,9 @@ use datafusion_common::cast::{
4040} ;
4141use datafusion_common:: { exec_datafusion_err, internal_datafusion_err, not_impl_err} ;
4242use datafusion_common_runtime:: SpawnedTask ;
43- use datafusion_execution:: TaskContext ;
4443
4544use chrono:: NaiveDate ;
45+ use datafusion_execution:: TaskContext ;
4646use futures:: StreamExt ;
4747use object_store:: path:: Path ;
4848use rand:: distr:: SampleString ;
@@ -68,6 +68,11 @@ pub type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
6868/// be written with the extension from the path. Otherwise the default extension
6969/// will be used and the output will be split into multiple files.
7070///
71+ /// Output file guarantees:
72+ /// - Partitioned files: Files are created only for non-empty partitions.
73+ /// - Single-file output: 1 file is always written, even when the stream is empty.
74+ /// - Multi-file output: Depending on the number of record batches, 0 or more files are written.
75+ ///
7176/// Examples of `base_output_path`
7277/// * `tmp/dataset/` -> is a folder since it ends in `/`
7378/// * `tmp/dataset` -> is still a folder since it does not end in `/` but has no valid file extension
@@ -171,10 +176,8 @@ async fn row_count_demuxer(
171176 max_rows_per_file
172177 } ;
173178
174- // Single-file output requires creating at least one file stream in advance.
175- // If no record batches are present in the input stream,
176- // the file stream must still be created to produce a valid output file.
177179 if single_file_output {
180+ // ensure we have one file open, even when the input stream is empty
178181 open_file_streams. push ( create_new_file_stream (
179182 & base_output_path,
180183 & write_id,
0 commit comments