Skip to content

Commit 240cbf4

Browse files
authored
Move examples in arrow-csv to docstrings, polish up docs (#9001)
# Which issue does this PR close? # Rationale for this change while reviewing @xanderbailey's PR in #8960, I found that there are examples for arrow-csv and they are hard to find. Also each example add extra binaries and thus slows down CI and tests. For example the `whitespace_handling` example makes a new 2.9MB binary: ```shell cargo run -p arrow-csv --example whitespace_handling ... du -s -h target/debug/examples/whitespace_handling 2.9M target/debug/examples/whitespace_handling ``` Let's consolidate the examples to make them easier to find # What changes are included in this PR? 1. Consolidate the examples 2. Improver other csv docs # Are these changes tested? We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 3. Serve as another way to document the expected behavior of the code # Are there any user-facing changes? Docs only, no functional changes
1 parent 3b097a5 commit 240cbf4

File tree

6 files changed

+71
-201
lines changed

6 files changed

+71
-201
lines changed

arrow-csv/examples/README.md

Lines changed: 0 additions & 21 deletions
This file was deleted.

arrow-csv/examples/csv_calculation.rs

Lines changed: 0 additions & 56 deletions
This file was deleted.

arrow-csv/examples/whitespace_handling.rs

Lines changed: 0 additions & 86 deletions
This file was deleted.

arrow-csv/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Transfer data between the Arrow memory format and CSV (comma-separated values).
18+
//! Transfer data between the [Apache Arrow] memory format and CSV (comma-separated values).
19+
//!
20+
//! [Apache Arrow]: https://arrow.apache.org/
1921
2022
#![doc(
2123
html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",

arrow-csv/src/reader/mod.rs

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! CSV Reader
18+
//! CSV Reading: [`Reader`] and [`ReaderBuilder`]
1919
//!
2020
//! # Basic Usage
2121
//!
@@ -42,6 +42,46 @@
4242
//! let batch = csv.next().unwrap().unwrap();
4343
//! ```
4444
//!
45+
//! # Example: Numeric calculations on CSV
46+
//! This code finds the maximum value in column 0 of a CSV file containing
47+
//! ```csv
48+
//! c1,c2,c3,c4
49+
//! 1,1.1,"hong kong",true
50+
//! 3,323.12,"XiAn",false
51+
//! 10,131323.12,"cheng du",false
52+
//! ```
53+
//!
54+
//! ```
55+
//! # use arrow_array::cast::AsArray;
56+
//! # use arrow_array::types::Int16Type;
57+
//! # use arrow_csv::ReaderBuilder;
58+
//! # use arrow_schema::{DataType, Field, Schema};
59+
//! # use std::fs::File;
60+
//! # use std::sync::Arc;
61+
//! // Open the example file
62+
//! let file = File::open("test/data/example.csv").unwrap();
63+
//! let csv_schema = Schema::new(vec![
64+
//! Field::new("c1", DataType::Int16, true),
65+
//! Field::new("c2", DataType::Float32, true),
66+
//! Field::new("c3", DataType::Utf8, true),
67+
//! Field::new("c4", DataType::Boolean, true),
68+
//! ]);
69+
//! let mut reader = ReaderBuilder::new(Arc::new(csv_schema))
70+
//! .with_header(true)
71+
//! .build(file)
72+
//! .unwrap();
73+
//! // find the maximum value in column 0 across all batches
74+
//! let mut max_c0 = 0;
75+
//! while let Some(r) = reader.next() {
76+
//! let r = r.unwrap(); // handle error
77+
//! // get the max value in column(0) for this batch
78+
//! let col = r.column(0).as_primitive::<Int16Type>();
79+
//! let batch_max = col.iter().max().flatten().unwrap_or_default();
80+
//! max_c0 = max_c0.max(batch_max);
81+
//! }
82+
//! assert_eq!(max_c0, 10);
83+
//!```
84+
//!
4585
//! # Async Usage
4686
//!
4787
//! The lower-level [`Decoder`] can be integrated with various forms of async data streams,
@@ -441,13 +481,18 @@ pub fn infer_schema_from_files(
441481
type Bounds = Option<(usize, usize)>;
442482

443483
/// CSV file reader using [`std::io::BufReader`]
484+
///
485+
/// See [`ReaderBuilder`] to construct a CSV reader with options and the
486+
/// [module-level documentation](crate::reader) for more details and examples
444487
pub type Reader<R> = BufReader<StdBufReader<R>>;
445488

446-
/// CSV file reader
489+
/// CSV file reader implementation. See [`Reader`] for usage
490+
///
491+
/// Despite having the same name as [`std::io::BufReader`, this structure does
492+
/// not buffer reads itself
447493
pub struct BufReader<R> {
448494
/// File reader
449495
reader: R,
450-
451496
/// The decoder
452497
decoder: Decoder,
453498
}
@@ -1053,7 +1098,7 @@ fn build_boolean_array(
10531098
.map(|e| Arc::new(e) as ArrayRef)
10541099
}
10551100

1056-
/// CSV file reader builder
1101+
/// Builder for CSV [`Reader`]s
10571102
#[derive(Debug)]
10581103
pub struct ReaderBuilder {
10591104
/// Schema of the CSV file
@@ -1071,9 +1116,10 @@ pub struct ReaderBuilder {
10711116
}
10721117

10731118
impl ReaderBuilder {
1074-
/// Create a new builder for configuring CSV parsing options.
1119+
/// Create a new builder for configuring [`Reader`] CSV parsing options.
10751120
///
1076-
/// To convert a builder into a reader, call `ReaderBuilder::build`
1121+
/// To convert a builder into a reader, call [`ReaderBuilder::build`]. See
1122+
/// the [module-level documentation](crate::reader) for more details and examples.
10771123
///
10781124
/// # Example
10791125
///

arrow-csv/src/writer.rs

Lines changed: 16 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,12 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! CSV Writer
18+
//! CSV Writing: [`Writer`] and [`WriterBuilder`]
1919
//!
2020
//! This CSV writer allows Arrow data (in record batches) to be written as CSV files.
2121
//! The writer does not support writing `ListArray` and `StructArray`.
2222
//!
2323
//! # Example
24-
//!
2524
//! ```
2625
//! # use arrow_array::*;
2726
//! # use arrow_array::types::*;
@@ -75,14 +74,13 @@
7574
//! - `DataType::LargeUtf8`
7675
//! - `DataType::Utf8View`
7776
//!
78-
//! ## Example with whitespace handling
77+
//! ## Example: Use [`WriterBuilder`] to control whitespace handling
7978
//!
8079
//! ```
8180
//! # use arrow_array::*;
8281
//! # use arrow_csv::WriterBuilder;
8382
//! # use arrow_schema::*;
8483
//! # use std::sync::Arc;
85-
//!
8684
//! let schema = Schema::new(vec![
8785
//! Field::new("name", DataType::Utf8, false),
8886
//! Field::new("comment", DataType::Utf8, false),
@@ -105,17 +103,6 @@
105103
//! )
106104
//! .unwrap();
107105
//!
108-
//! // Default behavior (no trimming)
109-
//! let mut output = Vec::new();
110-
//! WriterBuilder::new()
111-
//! .build(&mut output)
112-
//! .write(&batch)
113-
//! .unwrap();
114-
//! assert_eq!(
115-
//! String::from_utf8(output).unwrap(),
116-
//! "name,comment\n Alice , Great job! \nBob,Well done\n Charlie,Excellent \n"
117-
//! );
118-
//!
119106
//! // Trim both leading and trailing whitespace
120107
//! let mut output = Vec::new();
121108
//! WriterBuilder::new()
@@ -126,19 +113,11 @@
126113
//! .unwrap();
127114
//! assert_eq!(
128115
//! String::from_utf8(output).unwrap(),
129-
//! "name,comment\nAlice,Great job!\nBob,Well done\nCharlie,Excellent\n"
130-
//! );
131-
//!
132-
//! // Trim only leading whitespace
133-
//! let mut output = Vec::new();
134-
//! WriterBuilder::new()
135-
//! .with_ignore_leading_whitespace(true)
136-
//! .build(&mut output)
137-
//! .write(&batch)
138-
//! .unwrap();
139-
//! assert_eq!(
140-
//! String::from_utf8(output).unwrap(),
141-
//! "name,comment\nAlice ,Great job! \nBob,Well done\nCharlie,Excellent \n"
116+
//! "\
117+
//! name,comment\n\
118+
//! Alice,Great job!\n\
119+
//! Bob,Well done\n\
120+
//! Charlie,Excellent\n"
142121
//! );
143122
//! ```
144123
//!
@@ -220,6 +199,8 @@ const DEFAULT_NULL_VALUE: &str = "";
220199
pub use csv::QuoteStyle;
221200

222201
/// A CSV writer
202+
///
203+
/// See the [module documentation](crate::writer) for examples.
223204
#[derive(Debug)]
224205
pub struct Writer<W: Write> {
225206
/// The object to write to
@@ -248,12 +229,15 @@ pub struct Writer<W: Write> {
248229

249230
impl<W: Write> Writer<W> {
250231
/// Create a new CsvWriter from a writable object, with default options
232+
///
233+
/// See [`WriterBuilder`] for configure options, and the [module
234+
/// documentation](crate::writer) for examples.
251235
pub fn new(writer: W) -> Self {
252236
let delimiter = b',';
253237
WriterBuilder::new().with_delimiter(delimiter).build(writer)
254238
}
255239

256-
/// Write a RecordBatch to a writable object
240+
/// Write a RecordBatch to the underlying writer
257241
pub fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
258242
let num_columns = batch.num_columns();
259243
if self.beginning {
@@ -418,9 +402,10 @@ impl Default for WriterBuilder {
418402
}
419403

420404
impl WriterBuilder {
421-
/// Create a new builder for configuring CSV writing options.
405+
/// Create a new builder for configuring CSV [`Writer`] options.
422406
///
423-
/// To convert a builder into a writer, call `WriterBuilder::build`
407+
/// To convert a builder into a writer, call [`WriterBuilder::build`]. See
408+
/// the [module documentation](crate::writer) for more examples.
424409
///
425410
/// # Example
426411
///

0 commit comments

Comments
 (0)