You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: datafusion/datasource/src/write/demux.rs
+11-19Lines changed: 11 additions & 19 deletions
Original file line number
Diff line number
Diff line change
@@ -40,9 +40,9 @@ use datafusion_common::cast::{
40
40
};
41
41
use datafusion_common::{exec_datafusion_err, internal_datafusion_err, not_impl_err};
42
42
use datafusion_common_runtime::SpawnedTask;
43
-
use datafusion_execution::TaskContext;
44
43
45
44
use chrono::NaiveDate;
45
+
use datafusion_execution::TaskContext;
46
46
use futures::StreamExt;
47
47
use object_store::path::Path;
48
48
use rand::distr::SampleString;
@@ -68,6 +68,11 @@ pub type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
68
68
/// be written with the extension from the path. Otherwise the default extension
69
69
/// will be used and the output will be split into multiple files.
70
70
///
71
+
/// Output file guarantees:
72
+
/// - Partitioned files: Files are created only for non-empty partitions.
73
+
/// - Single-file output: At least one file is always written, even when the stream is empty.
74
+
/// - Multi-file output: At least the minimum number of parallel files specified in [`datafusion_common::config::ExecutionOptions`] are always written, even when the stream is empty.
75
+
///
71
76
/// Examples of `base_output_path`
72
77
/// * `tmp/dataset/` -> is a folder since it ends in `/`
73
78
/// * `tmp/dataset` -> is still a folder since it does not end in `/` but has no valid file extension
@@ -171,10 +176,9 @@ async fn row_count_demuxer(
171
176
max_rows_per_file
172
177
};
173
178
174
-
// Single-file output requires creating at least one file stream in advance.
175
-
// If no record batches are present in the input stream,
176
-
// the file stream must still be created to produce a valid output file.
177
-
if single_file_output {
179
+
// ensure we have at least minimum_parallel_files open, even when
0 commit comments