141141//! "name,comment\nAlice ,Great job! \nBob,Well done\nCharlie,Excellent \n"
142142//! );
143143//! ```
144+ //!
145+ //! # Quoting Styles
146+ //!
147+ //! The writer supports different quoting styles for fields, compatible with Apache Spark's
148+ //! CSV options like `quoteAll`. You can control when fields are quoted using the
149+ //! [`QuoteStyle`] enum.
150+ //!
151+ //! ## Example
152+ //!
153+ //! ```
154+ //! # use arrow_array::*;
155+ //! # use arrow_csv::{WriterBuilder, QuoteStyle};
156+ //! # use arrow_schema::*;
157+ //! # use std::sync::Arc;
158+ //!
159+ //! let schema = Schema::new(vec![
160+ //! Field::new("product", DataType::Utf8, false),
161+ //! Field::new("price", DataType::Float64, false),
162+ //! ]);
163+ //!
164+ //! let product = StringArray::from(vec!["apple", "banana,organic", "cherry"]);
165+ //! let price = Float64Array::from(vec![1.50, 2.25, 3.00]);
166+ //!
167+ //! let batch = RecordBatch::try_new(
168+ //! Arc::new(schema),
169+ //! vec![Arc::new(product), Arc::new(price)],
170+ //! )
171+ //! .unwrap();
172+ //!
173+ //! // Default behavior (QuoteStyle::Necessary)
174+ //! let mut output = Vec::new();
175+ //! WriterBuilder::new()
176+ //! .build(&mut output)
177+ //! .write(&batch)
178+ //! .unwrap();
179+ //! assert_eq!(
180+ //! String::from_utf8(output).unwrap(),
181+ //! "product,price\napple,1.5\n\"banana,organic\",2.25\ncherry,3.0\n"
182+ //! );
183+ //!
184+ //! // Quote all fields (Spark's quoteAll=true)
185+ //! let mut output = Vec::new();
186+ //! WriterBuilder::new()
187+ //! .with_quote_style(QuoteStyle::Always)
188+ //! .build(&mut output)
189+ //! .write(&batch)
190+ //! .unwrap();
191+ //! assert_eq!(
192+ //! String::from_utf8(output).unwrap(),
193+ //! "\"product\",\"price\"\n\"apple\",\"1.5\"\n\"banana,organic\",\"2.25\"\n\"cherry\",\"3.0\"\n"
194+ //! );
195+ //! ```
144196
145197use arrow_array:: * ;
146198use arrow_cast:: display:: * ;
@@ -151,6 +203,22 @@ use std::io::Write;
151203use crate :: map_csv_error;
152204const DEFAULT_NULL_VALUE : & str = "" ;
153205
206+ /// The quoting style to use when writing CSV files.
207+ ///
208+ /// This type is re-exported from the `csv` crate and supports different
209+ /// strategies for quoting fields. It is compatible with Apache Spark's
210+ /// CSV options like `quoteAll`.
211+ ///
212+ /// # Example
213+ ///
214+ /// ```
215+ /// use arrow_csv::{WriterBuilder, QuoteStyle};
216+ ///
217+ /// let builder = WriterBuilder::new()
218+ /// .with_quote_style(QuoteStyle::Always); // Equivalent to Spark's quoteAll=true
219+ /// ```
220+ pub use csv:: QuoteStyle ;
221+
154222/// A CSV writer
155223#[ derive( Debug ) ]
156224pub struct Writer < W : Write > {
@@ -324,6 +392,8 @@ pub struct WriterBuilder {
324392 ignore_leading_whitespace : bool ,
325393 /// Whether to ignore trailing whitespace in string values. Defaults to `false`
326394 ignore_trailing_whitespace : bool ,
395+ /// The quoting style to use. Defaults to `QuoteStyle::Necessary`
396+ quote_style : QuoteStyle ,
327397}
328398
329399impl Default for WriterBuilder {
@@ -342,6 +412,7 @@ impl Default for WriterBuilder {
342412 null_value : None ,
343413 ignore_leading_whitespace : false ,
344414 ignore_trailing_whitespace : false ,
415+ quote_style : QuoteStyle :: default ( ) ,
345416 }
346417 }
347418}
@@ -528,12 +599,38 @@ impl WriterBuilder {
528599 self . ignore_trailing_whitespace
529600 }
530601
602+ /// Set the quoting style for writing CSV files
603+ ///
604+ /// # Example
605+ ///
606+ /// ```
607+ /// use arrow_csv::{WriterBuilder, QuoteStyle};
608+ ///
609+ /// // Quote all fields (equivalent to Spark's quoteAll=true)
610+ /// let builder = WriterBuilder::new()
611+ /// .with_quote_style(QuoteStyle::Always);
612+ ///
613+ /// // Only quote when necessary (default)
614+ /// let builder = WriterBuilder::new()
615+ /// .with_quote_style(QuoteStyle::Necessary);
616+ /// ```
617+ pub fn with_quote_style ( mut self , quote_style : QuoteStyle ) -> Self {
618+ self . quote_style = quote_style;
619+ self
620+ }
621+
622+ /// Get the configured quoting style
623+ pub fn quote_style ( & self ) -> QuoteStyle {
624+ self . quote_style
625+ }
626+
531627 /// Create a new `Writer`
532628 pub fn build < W : Write > ( self , writer : W ) -> Writer < W > {
533629 let mut builder = csv:: WriterBuilder :: new ( ) ;
534630 let writer = builder
535631 . delimiter ( self . delimiter )
536632 . quote ( self . quote )
633+ . quote_style ( self . quote_style )
537634 . double_quote ( self . double_quote )
538635 . escape ( self . escape )
539636 . from_writer ( writer) ;
@@ -1181,4 +1278,99 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
11811278 String :: from_utf8( buf) . unwrap( )
11821279 ) ;
11831280 }
1281+
1282+ fn write_quote_style ( batch : & RecordBatch , quote_style : QuoteStyle ) -> String {
1283+ let mut buf = Vec :: new ( ) ;
1284+ let mut writer = WriterBuilder :: new ( )
1285+ . with_quote_style ( quote_style)
1286+ . build ( & mut buf) ;
1287+ writer. write ( batch) . unwrap ( ) ;
1288+ drop ( writer) ;
1289+ String :: from_utf8 ( buf) . unwrap ( )
1290+ }
1291+
1292+ fn write_quote_style_with_null (
1293+ batch : & RecordBatch ,
1294+ quote_style : QuoteStyle ,
1295+ null_value : & str ,
1296+ ) -> String {
1297+ let mut buf = Vec :: new ( ) ;
1298+ let mut writer = WriterBuilder :: new ( )
1299+ . with_quote_style ( quote_style)
1300+ . with_null ( null_value. to_string ( ) )
1301+ . build ( & mut buf) ;
1302+ writer. write ( batch) . unwrap ( ) ;
1303+ drop ( writer) ;
1304+ String :: from_utf8 ( buf) . unwrap ( )
1305+ }
1306+
1307+ #[ test]
1308+ fn test_write_csv_quote_style ( ) {
1309+ let schema = Schema :: new ( vec ! [
1310+ Field :: new( "text" , DataType :: Utf8 , false ) ,
1311+ Field :: new( "number" , DataType :: Int32 , false ) ,
1312+ Field :: new( "float" , DataType :: Float64 , false ) ,
1313+ ] ) ;
1314+
1315+ let text = StringArray :: from ( vec ! [ "hello" , "world" , "comma,value" , "quote\" test" ] ) ;
1316+ let number = Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 ] ) ;
1317+ let float = Float64Array :: from ( vec ! [ 1.1 , 2.2 , 3.3 , 4.4 ] ) ;
1318+
1319+ let batch = RecordBatch :: try_new (
1320+ Arc :: new ( schema) ,
1321+ vec ! [ Arc :: new( text) , Arc :: new( number) , Arc :: new( float) ] ,
1322+ )
1323+ . unwrap ( ) ;
1324+
1325+ // Test with QuoteStyle::Necessary (default)
1326+ assert_eq ! (
1327+ "text,number,float\n hello,1,1.1\n world,2,2.2\n \" comma,value\" ,3,3.3\n \" quote\" \" test\" ,4,4.4\n " ,
1328+ write_quote_style( & batch, QuoteStyle :: Necessary )
1329+ ) ;
1330+
1331+ // Test with QuoteStyle::Always (equivalent to Spark's quoteAll=true)
1332+ assert_eq ! (
1333+ "\" text\" ,\" number\" ,\" float\" \n \" hello\" ,\" 1\" ,\" 1.1\" \n \" world\" ,\" 2\" ,\" 2.2\" \n \" comma,value\" ,\" 3\" ,\" 3.3\" \n \" quote\" \" test\" ,\" 4\" ,\" 4.4\" \n " ,
1334+ write_quote_style( & batch, QuoteStyle :: Always )
1335+ ) ;
1336+
1337+ // Test with QuoteStyle::NonNumeric
1338+ assert_eq ! (
1339+ "\" text\" ,\" number\" ,\" float\" \n \" hello\" ,1,1.1\n \" world\" ,2,2.2\n \" comma,value\" ,3,3.3\n \" quote\" \" test\" ,4,4.4\n " ,
1340+ write_quote_style( & batch, QuoteStyle :: NonNumeric )
1341+ ) ;
1342+
1343+ // Test with QuoteStyle::Never (warning: can produce invalid CSV)
1344+ // Note: This produces invalid CSV for fields with commas or quotes
1345+ assert_eq ! (
1346+ "text,number,float\n hello,1,1.1\n world,2,2.2\n comma,value,3,3.3\n quote\" test,4,4.4\n " ,
1347+ write_quote_style( & batch, QuoteStyle :: Never )
1348+ ) ;
1349+ }
1350+
1351+ #[ test]
1352+ fn test_write_csv_quote_style_with_nulls ( ) {
1353+ let schema = Schema :: new ( vec ! [
1354+ Field :: new( "text" , DataType :: Utf8 , true ) ,
1355+ Field :: new( "number" , DataType :: Int32 , true ) ,
1356+ ] ) ;
1357+
1358+ let text = StringArray :: from ( vec ! [ Some ( "hello" ) , None , Some ( "world" ) ] ) ;
1359+ let number = Int32Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , None ] ) ;
1360+
1361+ let batch =
1362+ RecordBatch :: try_new ( Arc :: new ( schema) , vec ! [ Arc :: new( text) , Arc :: new( number) ] ) . unwrap ( ) ;
1363+
1364+ // Test with QuoteStyle::Always
1365+ assert_eq ! (
1366+ "\" text\" ,\" number\" \n \" hello\" ,\" 1\" \n \" \" ,\" 2\" \n \" world\" ,\" \" \n " ,
1367+ write_quote_style( & batch, QuoteStyle :: Always )
1368+ ) ;
1369+
1370+ // Test with QuoteStyle::Always and custom null value
1371+ assert_eq ! (
1372+ "\" text\" ,\" number\" \n \" hello\" ,\" 1\" \n \" NULL\" ,\" 2\" \n \" world\" ,\" NULL\" \n " ,
1373+ write_quote_style_with_null( & batch, QuoteStyle :: Always , "NULL" )
1374+ ) ;
1375+ }
11841376}
0 commit comments