You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
use datafusion_common::{exec_datafusion_err, internal_datafusion_err, not_impl_err};
42
43
use datafusion_common_runtime::SpawnedTask;
43
-
use datafusion_execution::TaskContext;
44
44
45
45
use chrono::NaiveDate;
46
+
use datafusion_execution::TaskContext;
46
47
use futures::StreamExt;
47
48
use object_store::path::Path;
48
49
use rand::distr::SampleString;
@@ -68,6 +69,11 @@ pub type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
68
69
/// be written with the extension from the path. Otherwise the default extension
69
70
/// will be used and the output will be split into multiple files.
70
71
///
72
+
/// Output file guarantees:
73
+
/// - Partitioned files: Files are created only for non-empty partitions.
74
+
/// - Single-file output: At least one file is always written, even when the stream is empty.
75
+
/// - Multi-file output: At least the minimum number of parallel files specified in [`ExecutionOptions`] are always written, even when the stream is empty.
76
+
///
71
77
/// Examples of `base_output_path`
72
78
/// * `tmp/dataset/` -> is a folder since it ends in `/`
73
79
/// * `tmp/dataset` -> is still a folder since it does not end in `/` but has no valid file extension
@@ -171,10 +177,9 @@ async fn row_count_demuxer(
171
177
max_rows_per_file
172
178
};
173
179
174
-
// Single-file output requires creating at least one file stream in advance.
175
-
// If no record batches are present in the input stream,
176
-
// the file stream must still be created to produce a valid output file.
177
-
if single_file_output {
180
+
// ensure we have at least minimum_parallel_files open, even when
0 commit comments