Skip to content

Commit 2fcd584

Browse files
committed
comments
1 parent e625631 commit 2fcd584

File tree

1 file changed

+34
-47
lines changed

1 file changed

+34
-47
lines changed

arrow-csv/src/writer.rs

Lines changed: 34 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -148,15 +148,7 @@
148148
//! CSV options like `quoteAll`. You can control when fields are quoted using the
149149
//! [`QuoteStyle`] enum.
150150
//!
151-
//! ## Available Quoting Styles
152-
//!
153-
//! - `QuoteStyle::Necessary` (default): Only quotes fields when necessary (e.g., when they
154-
//! contain delimiters, quotes, or newlines)
155-
//! - `QuoteStyle::Always`: Quotes all fields (equivalent to Spark's `quoteAll=true`)
156-
//! - `QuoteStyle::NonNumeric`: Quotes only non-numeric fields
157-
//! - `QuoteStyle::Never`: Never quotes fields (warning: can produce invalid CSV)
158-
//!
159-
//! ## Example with quoting styles
151+
//! ## Example
160152
//!
161153
//! ```
162154
//! # use arrow_array::*;
@@ -1287,6 +1279,33 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
12871279
);
12881280
}
12891281

1282+
/// Helper function to write a batch with a specific quote style
1283+
fn write_quote_style(batch: &RecordBatch, quote_style: QuoteStyle) -> String {
1284+
let mut buf = Vec::new();
1285+
let mut writer = WriterBuilder::new()
1286+
.with_quote_style(quote_style)
1287+
.build(&mut buf);
1288+
writer.write(batch).unwrap();
1289+
drop(writer);
1290+
String::from_utf8(buf).unwrap()
1291+
}
1292+
1293+
/// Helper function to write a batch with a specific quote style and null value
1294+
fn write_quote_style_with_null(
1295+
batch: &RecordBatch,
1296+
quote_style: QuoteStyle,
1297+
null_value: &str,
1298+
) -> String {
1299+
let mut buf = Vec::new();
1300+
let mut writer = WriterBuilder::new()
1301+
.with_quote_style(quote_style)
1302+
.with_null(null_value.to_string())
1303+
.build(&mut buf);
1304+
writer.write(batch).unwrap();
1305+
drop(writer);
1306+
String::from_utf8(buf).unwrap()
1307+
}
1308+
12901309
#[test]
12911310
fn test_write_csv_quote_style() {
12921311
let schema = Schema::new(vec![
@@ -1306,48 +1325,28 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
13061325
.unwrap();
13071326

13081327
// Test with QuoteStyle::Necessary (default)
1309-
let mut buf = Vec::new();
1310-
let builder = WriterBuilder::new().with_quote_style(QuoteStyle::Necessary);
1311-
let mut writer = builder.build(&mut buf);
1312-
writer.write(&batch).unwrap();
1313-
drop(writer);
13141328
assert_eq!(
13151329
"text,number,float\nhello,1,1.1\nworld,2,2.2\n\"comma,value\",3,3.3\n\"quote\"\"test\",4,4.4\n",
1316-
String::from_utf8(buf).unwrap()
1330+
write_quote_style(&batch, QuoteStyle::Necessary)
13171331
);
13181332

13191333
// Test with QuoteStyle::Always (equivalent to Spark's quoteAll=true)
1320-
let mut buf = Vec::new();
1321-
let builder = WriterBuilder::new().with_quote_style(QuoteStyle::Always);
1322-
let mut writer = builder.build(&mut buf);
1323-
writer.write(&batch).unwrap();
1324-
drop(writer);
13251334
assert_eq!(
13261335
"\"text\",\"number\",\"float\"\n\"hello\",\"1\",\"1.1\"\n\"world\",\"2\",\"2.2\"\n\"comma,value\",\"3\",\"3.3\"\n\"quote\"\"test\",\"4\",\"4.4\"\n",
1327-
String::from_utf8(buf).unwrap()
1336+
write_quote_style(&batch, QuoteStyle::Always)
13281337
);
13291338

13301339
// Test with QuoteStyle::NonNumeric
1331-
let mut buf = Vec::new();
1332-
let builder = WriterBuilder::new().with_quote_style(QuoteStyle::NonNumeric);
1333-
let mut writer = builder.build(&mut buf);
1334-
writer.write(&batch).unwrap();
1335-
drop(writer);
13361340
assert_eq!(
13371341
"\"text\",\"number\",\"float\"\n\"hello\",1,1.1\n\"world\",2,2.2\n\"comma,value\",3,3.3\n\"quote\"\"test\",4,4.4\n",
1338-
String::from_utf8(buf).unwrap()
1342+
write_quote_style(&batch, QuoteStyle::NonNumeric)
13391343
);
13401344

13411345
// Test with QuoteStyle::Never (warning: can produce invalid CSV)
1342-
let mut buf = Vec::new();
1343-
let builder = WriterBuilder::new().with_quote_style(QuoteStyle::Never);
1344-
let mut writer = builder.build(&mut buf);
1345-
writer.write(&batch).unwrap();
1346-
drop(writer);
13471346
// Note: This produces invalid CSV for fields with commas or quotes
13481347
assert_eq!(
13491348
"text,number,float\nhello,1,1.1\nworld,2,2.2\ncomma,value,3,3.3\nquote\"test,4,4.4\n",
1350-
String::from_utf8(buf).unwrap()
1349+
write_quote_style(&batch, QuoteStyle::Never)
13511350
);
13521351
}
13531352

@@ -1365,27 +1364,15 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
13651364
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(text), Arc::new(number)]).unwrap();
13661365

13671366
// Test with QuoteStyle::Always
1368-
let mut buf = Vec::new();
1369-
let builder = WriterBuilder::new().with_quote_style(QuoteStyle::Always);
1370-
let mut writer = builder.build(&mut buf);
1371-
writer.write(&batch).unwrap();
1372-
drop(writer);
13731367
assert_eq!(
13741368
"\"text\",\"number\"\n\"hello\",\"1\"\n\"\",\"2\"\n\"world\",\"\"\n",
1375-
String::from_utf8(buf).unwrap()
1369+
write_quote_style(&batch, QuoteStyle::Always)
13761370
);
13771371

13781372
// Test with QuoteStyle::Always and custom null value
1379-
let mut buf = Vec::new();
1380-
let builder = WriterBuilder::new()
1381-
.with_quote_style(QuoteStyle::Always)
1382-
.with_null("NULL".to_string());
1383-
let mut writer = builder.build(&mut buf);
1384-
writer.write(&batch).unwrap();
1385-
drop(writer);
13861373
assert_eq!(
13871374
"\"text\",\"number\"\n\"hello\",\"1\"\n\"NULL\",\"2\"\n\"world\",\"NULL\"\n",
1388-
String::from_utf8(buf).unwrap()
1375+
write_quote_style_with_null(&batch, QuoteStyle::Always, "NULL")
13891376
);
13901377
}
13911378
}

0 commit comments

Comments
 (0)