Skip to content

Commit 2ee0fa0

Browse files
authored
docs: update examples in ArrowReaderOptions to use in-memory buffers (#9163)
# Which issue does this PR close? Closes #9161 # Rationale for this change This PR applies the feedback from #9116 to make the parquet reader documentation examples more concise and easier to follow. # What changes are included in this PR? Updated 3 documentation examples in `parquet/src/arrow/arrow_reader/mod.rs`: 1. **`with_schema` example 1** - Schema mapping with timestamp 2. **`with_schema` example 2** - Dictionary encoding preservation 3. **`with_virtual_columns` example** - Virtual columns for row numbers Changes in each example: - Replaced `tempfile::tempfile()` with `Vec::new()` for in-memory buffer - Added `use bytes::Bytes;` import - Changed `ArrowWriter::try_new(file.try_clone()?, ...)` to `ArrowWriter::try_new(&mut file, ...)` - Added `let file = Bytes::from(file);` to convert buffer for reading - Added `#` prefixes to hide setup/imports in rendered docs The async example in `async_reader/mod.rs` was intentionally left unchanged since it demonstrates `tokio::fs::File` usage. # Are there any user-facing changes? No functional changes, only documentation improvements to make examples smaller and cleaner in rendered docs.
1 parent 9b633fb commit 2ee0fa0

File tree

1 file changed

+25
-25
lines changed
  • parquet/src/arrow/arrow_reader

1 file changed

+25
-25
lines changed

parquet/src/arrow/arrow_reader/mod.rs

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -506,22 +506,21 @@ impl ArrowReaderOptions {
506506
///
507507
/// # Example
508508
/// ```
509-
/// use std::io::Bytes;
510-
/// use std::sync::Arc;
511-
/// use tempfile::tempfile;
512-
/// use arrow_array::{ArrayRef, Int32Array, RecordBatch};
513-
/// use arrow_schema::{DataType, Field, Schema, TimeUnit};
514-
/// use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
515-
/// use parquet::arrow::ArrowWriter;
516-
///
509+
/// # use std::sync::Arc;
510+
/// # use bytes::Bytes;
511+
/// # use arrow_array::{ArrayRef, Int32Array, RecordBatch};
512+
/// # use arrow_schema::{DataType, Field, Schema, TimeUnit};
513+
/// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
514+
/// # use parquet::arrow::ArrowWriter;
517515
/// // Write data - schema is inferred from the data to be Int32
518-
/// let file = tempfile().unwrap();
516+
/// let mut file = Vec::new();
519517
/// let batch = RecordBatch::try_from_iter(vec![
520518
/// ("col_1", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef),
521519
/// ]).unwrap();
522-
/// let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(), batch.schema(), None).unwrap();
520+
/// let mut writer = ArrowWriter::try_new(&mut file, batch.schema(), None).unwrap();
523521
/// writer.write(&batch).unwrap();
524522
/// writer.close().unwrap();
523+
/// let file = Bytes::from(file);
525524
///
526525
/// // Read the file back.
527526
/// // Supply a schema that interprets the Int32 column as a Timestamp.
@@ -530,7 +529,7 @@ impl ArrowReaderOptions {
530529
/// ]));
531530
/// let options = ArrowReaderOptions::new().with_schema(supplied_schema.clone());
532531
/// let mut builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
533-
/// file.try_clone().unwrap(),
532+
/// file.clone(),
534533
/// options
535534
/// ).expect("Error if the schema is not compatible with the parquet file schema.");
536535
///
@@ -546,24 +545,24 @@ impl ArrowReaderOptions {
546545
/// the dictionary encoding by specifying a `Dictionary` type in the schema hint:
547546
///
548547
/// ```
549-
/// use std::sync::Arc;
550-
/// use tempfile::tempfile;
551-
/// use arrow_array::{ArrayRef, RecordBatch, StringArray};
552-
/// use arrow_schema::{DataType, Field, Schema};
553-
/// use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
554-
/// use parquet::arrow::ArrowWriter;
555-
///
548+
/// # use std::sync::Arc;
549+
/// # use bytes::Bytes;
550+
/// # use arrow_array::{ArrayRef, RecordBatch, StringArray};
551+
/// # use arrow_schema::{DataType, Field, Schema};
552+
/// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
553+
/// # use parquet::arrow::ArrowWriter;
556554
/// // Write a Parquet file with string data
557-
/// let file = tempfile().unwrap();
555+
/// let mut file = Vec::new();
558556
/// let schema = Arc::new(Schema::new(vec![
559557
/// Field::new("city", DataType::Utf8, false)
560558
/// ]));
561559
/// let cities = StringArray::from(vec!["Berlin", "Berlin", "Paris", "Berlin", "Paris"]);
562560
/// let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(cities)]).unwrap();
563561
///
564-
/// let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(), batch.schema(), None).unwrap();
562+
/// let mut writer = ArrowWriter::try_new(&mut file, batch.schema(), None).unwrap();
565563
/// writer.write(&batch).unwrap();
566564
/// writer.close().unwrap();
565+
/// let file = Bytes::from(file);
567566
///
568567
/// // Read the file back, requesting dictionary encoding preservation
569568
/// let dict_schema = Arc::new(Schema::new(vec![
@@ -574,7 +573,7 @@ impl ArrowReaderOptions {
574573
/// ]));
575574
/// let options = ArrowReaderOptions::new().with_schema(dict_schema);
576575
/// let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
577-
/// file.try_clone().unwrap(),
576+
/// file.clone(),
578577
/// options
579578
/// ).unwrap();
580579
///
@@ -703,26 +702,27 @@ impl ArrowReaderOptions {
703702
/// # Example
704703
/// ```
705704
/// # use std::sync::Arc;
705+
/// # use bytes::Bytes;
706706
/// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
707707
/// # use arrow_schema::{DataType, Field, Schema};
708708
/// # use parquet::arrow::{ArrowWriter, RowNumber};
709709
/// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
710-
/// # use tempfile::tempfile;
711710
/// #
712711
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
713712
/// // Create a simple record batch with some data
714713
/// let values = Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef;
715714
/// let batch = RecordBatch::try_from_iter(vec![("value", values)])?;
716715
///
717-
/// // Write the batch to a temporary parquet file
718-
/// let file = tempfile()?;
716+
/// // Write the batch to an in-memory buffer
717+
/// let mut file = Vec::new();
719718
/// let mut writer = ArrowWriter::try_new(
720-
/// file.try_clone()?,
719+
/// &mut file,
721720
/// batch.schema(),
722721
/// None
723722
/// )?;
724723
/// writer.write(&batch)?;
725724
/// writer.close()?;
725+
/// let file = Bytes::from(file);
726726
///
727727
/// // Create a virtual column for row numbers
728728
/// let row_number_field = Arc::new(Field::new("row_number", DataType::Int64, false)

0 commit comments

Comments
 (0)