From 093810efd350c88be884b384424e73c01f535880 Mon Sep 17 00:00:00 2001 From: svranesevic Date: Thu, 26 Mar 2026 12:17:04 +0300 Subject: [PATCH 1/3] Expose option to set line terminator for arrow-csv --- arrow-csv/src/writer.rs | 109 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 102 insertions(+), 7 deletions(-) diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs index c38d1cdec337..d243f28f2d5d 100644 --- a/arrow-csv/src/writer.rs +++ b/arrow-csv/src/writer.rs @@ -358,6 +358,8 @@ pub struct WriterBuilder { quote: u8, /// Optional escape character. Defaults to `b'\\'` escape: u8, + /// Optional line terminator. Defaults to `LF` (`\n`) + terminator: Terminator, /// Enable double quote escapes. Defaults to `true` double_quote: bool, /// Optional date format for date arrays @@ -380,6 +382,15 @@ pub struct WriterBuilder { quote_style: QuoteStyle, } +/// The line terminator to use when writing CSV files. +#[derive(Clone, Debug)] +pub enum Terminator { + /// Use CRLF (`\r\n`) as the line terminator + CRLF, + /// Use the specified byte character as the line terminator + Any(u8), +} + impl Default for WriterBuilder { fn default() -> Self { WriterBuilder { @@ -387,6 +398,7 @@ impl Default for WriterBuilder { has_header: true, quote: b'"', escape: b'\\', + terminator: Terminator::Any(b'\n'), double_quote: true, date_format: None, datetime_format: None, @@ -604,20 +616,33 @@ impl WriterBuilder { self } - /// Get the configured quoting style - pub fn quote_style(&self) -> QuoteStyle { - self.quote_style + /// Set the CSV file's line terminator + pub fn with_line_terminator(mut self, terminator: Terminator) -> Self { + self.terminator = terminator; + self + } + + /// Get the CSV file's line terminator, defaults to `LF` (`\n`) + pub fn line_terminator(&self) -> &Terminator { + &self.terminator } /// Create a new `Writer` pub fn build(self, writer: W) -> Writer { let mut builder = csv::WriterBuilder::new(); + + let terminator = match self.terminator { + Terminator::CRLF => csv::Terminator::CRLF, + Terminator::Any(byte) => csv::Terminator::Any(byte), + }; + let writer = builder .delimiter(self.delimiter) .quote(self.quote) .quote_style(self.quote_style) .double_quote(self.double_quote) .escape(self.escape) + .terminator(terminator) .from_writer(writer); Writer { writer, @@ -1027,10 +1052,80 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo let mut buffer: Vec = vec![]; file.read_to_end(&mut buffer).unwrap(); - assert_eq!( - "c1,c2\n00:02,46:17\n00:02,\n", - String::from_utf8(buffer).unwrap() - ); + let output = String::from_utf8(buffer).unwrap(); + assert_eq!(output, "c1,c2\n00:02,46:17\n00:02,\n"); + } + + #[test] + fn test_write_csv_with_lf_terminator() { + let schema = Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::UInt32, false), + ]); + + let c1 = StringArray::from(vec!["hello", "world"]); + let c2 = PrimitiveArray::::from(vec![1, 2]); + + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap(); + + let mut buf = Vec::new(); + let mut writer = WriterBuilder::new() + .with_line_terminator(Terminator::Any(b'\n')) + .build(&mut buf); + writer.write(&batch).unwrap(); + drop(writer); + + let output = String::from_utf8(buf).unwrap(); + assert_eq!(output, "c1,c2\nhello,1\nworld,2\n"); + } + + #[test] + fn test_write_csv_with_crlf_terminator() { + let schema = Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::UInt32, false), + ]); + + let c1 = StringArray::from(vec!["hello", "world"]); + let c2 = PrimitiveArray::::from(vec![1, 2]); + + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap(); + + let mut buf = Vec::new(); + let mut writer = WriterBuilder::new() + .with_line_terminator(Terminator::CRLF) + .build(&mut buf); + writer.write(&batch).unwrap(); + drop(writer); + + let output = String::from_utf8(buf).unwrap(); + assert_eq!(output, "c1,c2\r\nhello,1\r\nworld,2\r\n"); + } + + #[test] + fn test_write_csv_with_any_terminator() { + let schema = Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::UInt32, false), + ]); + + let c1 = StringArray::from(vec!["hello", "world"]); + let c2 = PrimitiveArray::::from(vec![1, 2]); + + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap(); + + let mut buf = Vec::new(); + let mut writer = WriterBuilder::new() + .with_line_terminator(Terminator::Any(b'|')) + .build(&mut buf); + writer.write(&batch).unwrap(); + drop(writer); + + let output = String::from_utf8(buf).unwrap(); + assert_eq!(output, "c1,c2|hello,1|world,2|"); } #[test] From a16169f4a6e5ed6aa53fadb1e097dce23466ac3b Mon Sep 17 00:00:00 2001 From: svranesevic Date: Thu, 26 Mar 2026 12:23:29 +0300 Subject: [PATCH 2/3] Revert accidental deletion --- arrow-csv/src/writer.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs index d243f28f2d5d..e92cb2514712 100644 --- a/arrow-csv/src/writer.rs +++ b/arrow-csv/src/writer.rs @@ -616,6 +616,11 @@ impl WriterBuilder { self } + /// Get the configured quoting style + pub fn quote_style(&self) -> QuoteStyle { + self.quote_style + } + /// Set the CSV file's line terminator pub fn with_line_terminator(mut self, terminator: Terminator) -> Self { self.terminator = terminator; From 25a78cb3de3ad504e01ee8df1b0ecd0fb41950e4 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 1 Apr 2026 08:42:22 -0400 Subject: [PATCH 3/3] Reduce test replication --- arrow-csv/src/writer.rs | 50 ++++++++--------------------------------- 1 file changed, 9 insertions(+), 41 deletions(-) diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs index e92cb2514712..72dcb640e1c9 100644 --- a/arrow-csv/src/writer.rs +++ b/arrow-csv/src/writer.rs @@ -1063,54 +1063,23 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo #[test] fn test_write_csv_with_lf_terminator() { - let schema = Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::UInt32, false), - ]); - - let c1 = StringArray::from(vec!["hello", "world"]); - let c2 = PrimitiveArray::::from(vec![1, 2]); - - let batch = - RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap(); - - let mut buf = Vec::new(); - let mut writer = WriterBuilder::new() - .with_line_terminator(Terminator::Any(b'\n')) - .build(&mut buf); - writer.write(&batch).unwrap(); - drop(writer); - - let output = String::from_utf8(buf).unwrap(); + let output = write_batch_with_terminator(Terminator::Any(b'\n')); assert_eq!(output, "c1,c2\nhello,1\nworld,2\n"); } #[test] fn test_write_csv_with_crlf_terminator() { - let schema = Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::UInt32, false), - ]); - - let c1 = StringArray::from(vec!["hello", "world"]); - let c2 = PrimitiveArray::::from(vec![1, 2]); - - let batch = - RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap(); - - let mut buf = Vec::new(); - let mut writer = WriterBuilder::new() - .with_line_terminator(Terminator::CRLF) - .build(&mut buf); - writer.write(&batch).unwrap(); - drop(writer); - - let output = String::from_utf8(buf).unwrap(); + let output = write_batch_with_terminator(Terminator::CRLF); assert_eq!(output, "c1,c2\r\nhello,1\r\nworld,2\r\n"); } #[test] fn test_write_csv_with_any_terminator() { + let output = write_batch_with_terminator(Terminator::Any(b'|')); + assert_eq!(output, "c1,c2|hello,1|world,2|"); + } + + fn write_batch_with_terminator(terminator: Terminator) -> String { let schema = Schema::new(vec![ Field::new("c1", DataType::Utf8, false), Field::new("c2", DataType::UInt32, false), @@ -1124,13 +1093,12 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo let mut buf = Vec::new(); let mut writer = WriterBuilder::new() - .with_line_terminator(Terminator::Any(b'|')) + .with_line_terminator(terminator) .build(&mut buf); writer.write(&batch).unwrap(); drop(writer); - let output = String::from_utf8(buf).unwrap(); - assert_eq!(output, "c1,c2|hello,1|world,2|"); + String::from_utf8(buf).unwrap() } #[test]