Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 25 additions & 25 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,22 +506,21 @@ impl ArrowReaderOptions {
///
/// # Example
/// ```
/// use std::io::Bytes;
/// use std::sync::Arc;
/// use tempfile::tempfile;
/// use arrow_array::{ArrayRef, Int32Array, RecordBatch};
/// use arrow_schema::{DataType, Field, Schema, TimeUnit};
/// use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
/// use parquet::arrow::ArrowWriter;
///
/// # use std::sync::Arc;
/// # use bytes::Bytes;
/// # use arrow_array::{ArrayRef, Int32Array, RecordBatch};
/// # use arrow_schema::{DataType, Field, Schema, TimeUnit};
/// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
/// # use parquet::arrow::ArrowWriter;
/// // Write data - schema is inferred from the data to be Int32
/// let file = tempfile().unwrap();
/// let mut file = Vec::new();
/// let batch = RecordBatch::try_from_iter(vec![
/// ("col_1", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef),
/// ]).unwrap();
/// let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(), batch.schema(), None).unwrap();
/// let mut writer = ArrowWriter::try_new(&mut file, batch.schema(), None).unwrap();
/// writer.write(&batch).unwrap();
/// writer.close().unwrap();
/// let file = Bytes::from(file);
///
/// // Read the file back.
/// // Supply a schema that interprets the Int32 column as a Timestamp.
Expand All @@ -530,7 +529,7 @@ impl ArrowReaderOptions {
/// ]));
/// let options = ArrowReaderOptions::new().with_schema(supplied_schema.clone());
/// let mut builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
/// file.try_clone().unwrap(),
/// file.clone(),
/// options
/// ).expect("Error if the schema is not compatible with the parquet file schema.");
///
Expand All @@ -546,24 +545,24 @@ impl ArrowReaderOptions {
/// the dictionary encoding by specifying a `Dictionary` type in the schema hint:
///
/// ```
/// use std::sync::Arc;
/// use tempfile::tempfile;
/// use arrow_array::{ArrayRef, RecordBatch, StringArray};
/// use arrow_schema::{DataType, Field, Schema};
/// use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
/// use parquet::arrow::ArrowWriter;
///
/// # use std::sync::Arc;
/// # use bytes::Bytes;
/// # use arrow_array::{ArrayRef, RecordBatch, StringArray};
/// # use arrow_schema::{DataType, Field, Schema};
/// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
/// # use parquet::arrow::ArrowWriter;
/// // Write a Parquet file with string data
/// let file = tempfile().unwrap();
/// let mut file = Vec::new();
/// let schema = Arc::new(Schema::new(vec![
/// Field::new("city", DataType::Utf8, false)
/// ]));
/// let cities = StringArray::from(vec!["Berlin", "Berlin", "Paris", "Berlin", "Paris"]);
/// let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(cities)]).unwrap();
///
/// let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(), batch.schema(), None).unwrap();
/// let mut writer = ArrowWriter::try_new(&mut file, batch.schema(), None).unwrap();
/// writer.write(&batch).unwrap();
/// writer.close().unwrap();
/// let file = Bytes::from(file);
///
/// // Read the file back, requesting dictionary encoding preservation
/// let dict_schema = Arc::new(Schema::new(vec![
Expand All @@ -574,7 +573,7 @@ impl ArrowReaderOptions {
/// ]));
/// let options = ArrowReaderOptions::new().with_schema(dict_schema);
/// let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
/// file.try_clone().unwrap(),
/// file.clone(),
/// options
/// ).unwrap();
///
Expand Down Expand Up @@ -703,26 +702,27 @@ impl ArrowReaderOptions {
/// # Example
/// ```
/// # use std::sync::Arc;
/// # use bytes::Bytes;
/// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
/// # use arrow_schema::{DataType, Field, Schema};
/// # use parquet::arrow::{ArrowWriter, RowNumber};
/// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
/// # use tempfile::tempfile;
/// #
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// // Create a simple record batch with some data
/// let values = Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef;
/// let batch = RecordBatch::try_from_iter(vec![("value", values)])?;
///
/// // Write the batch to a temporary parquet file
/// let file = tempfile()?;
/// // Write the batch to an in-memory buffer
/// let mut file = Vec::new();
/// let mut writer = ArrowWriter::try_new(
/// file.try_clone()?,
/// &mut file,
/// batch.schema(),
/// None
/// )?;
/// writer.write(&batch)?;
/// writer.close()?;
/// let file = Bytes::from(file);
///
/// // Create a virtual column for row numbers
/// let row_number_field = Arc::new(Field::new("row_number", DataType::Int64, false)
Expand Down
Loading