Skip to content

Commit b21ac51

Browse files
author
Bert Vermeiren
committed
Fix COPY TO does not produce an output file for the empty set (comments added)
1 parent 18fbd63 commit b21ac51

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

datafusion/datasource/src/write/demux.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ use datafusion_common::cast::{
4040
};
4141
use datafusion_common::{exec_datafusion_err, internal_datafusion_err, not_impl_err};
4242
use datafusion_common_runtime::SpawnedTask;
43-
use datafusion_execution::TaskContext;
4443

4544
use chrono::NaiveDate;
45+
use datafusion_execution::TaskContext;
4646
use futures::StreamExt;
4747
use object_store::path::Path;
4848
use rand::distr::SampleString;
@@ -68,6 +68,11 @@ pub type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
6868
/// be written with the extension from the path. Otherwise the default extension
6969
/// will be used and the output will be split into multiple files.
7070
///
71+
/// Output file guarantees:
72+
/// - Partitioned files: Files are created only for non-empty partitions.
73+
/// - Single-file output: 1 file is always written, even when the stream is empty.
74+
/// - Multi-file output: Depending on the number of record batches, 0 or more files are written.
75+
///
7176
/// Examples of `base_output_path`
7277
/// * `tmp/dataset/` -> is a folder since it ends in `/`
7378
/// * `tmp/dataset` -> is still a folder since it does not end in `/` but has no valid file extension
@@ -171,10 +176,8 @@ async fn row_count_demuxer(
171176
max_rows_per_file
172177
};
173178

174-
// Single-file output requires creating at least one file stream in advance.
175-
// If no record batches are present in the input stream,
176-
// the file stream must still be created to produce a valid output file.
177179
if single_file_output {
180+
// ensure we have one file open, even when the input stream is empty
178181
open_file_streams.push(create_new_file_stream(
179182
&base_output_path,
180183
&write_id,

0 commit comments

Comments
 (0)