From 4ff1f3f8301aeee54b89b516b048fe2391cbaab3 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 12 Nov 2025 23:22:44 +0100 Subject: [PATCH 1/8] Draft for implementing custom ArrayFormatters --- arrow-cast/src/display.rs | 42 ++++--- arrow-cast/src/pretty.rs | 257 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 270 insertions(+), 29 deletions(-) diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index caa9804507d8..4584bb17e2a6 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -57,23 +57,23 @@ pub enum DurationFormat { pub struct FormatOptions<'a> { /// If set to `true` any formatting errors will be written to the output /// instead of being converted into a [`std::fmt::Error`] - safe: bool, + pub safe: bool, /// Format string for nulls - null: &'a str, + pub null: &'a str, /// Date format for date arrays - date_format: TimeFormat<'a>, + pub date_format: TimeFormat<'a>, /// Format for DateTime arrays - datetime_format: TimeFormat<'a>, + pub datetime_format: TimeFormat<'a>, /// Timestamp format for timestamp arrays - timestamp_format: TimeFormat<'a>, + pub timestamp_format: TimeFormat<'a>, /// Timestamp format for timestamp with timezone arrays - timestamp_tz_format: TimeFormat<'a>, + pub timestamp_tz_format: TimeFormat<'a>, /// Time format for time arrays - time_format: TimeFormat<'a>, + pub time_format: TimeFormat<'a>, /// Duration format - duration_format: DurationFormat, + pub duration_format: DurationFormat, /// Show types in visual representation batches - types_info: bool, + pub types_info: bool, } impl Default for FormatOptions<'_> { @@ -170,6 +170,10 @@ impl<'a> FormatOptions<'a> { } /// Returns true if type info should be included in visual representation of batches + #[deprecated( + since = "58.0.0", + note = "Directly access the `types_info` field instead.`" + )] pub const fn types_info(&self) -> bool { self.types_info } @@ -272,14 +276,16 @@ pub struct ArrayFormatter<'a> { } impl<'a> ArrayFormatter<'a> { + /// Returns an [`ArrayFormatter`] using the provided formatter. + pub fn new(format: Box, safe: bool) -> Self { + Self { format, safe } + } + /// Returns an [`ArrayFormatter`] that can be used to format `array` /// /// This returns an error if an array of the given data type cannot be formatted pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result { - Ok(Self { - format: make_formatter(array, options)?, - safe: options.safe, - }) + Ok(Self::new(make_formatter(array, options)?, options.safe)) } /// Returns a [`ValueFormatter`] that implements [`Display`] for @@ -332,12 +338,15 @@ fn make_formatter<'a>( } /// Either an [`ArrowError`] or [`std::fmt::Error`] -enum FormatError { +pub enum FormatError { + /// An error occurred while formatting the array Format(std::fmt::Error), + /// An Arrow error occurred while formatting the array. Arrow(ArrowError), } -type FormatResult = Result<(), FormatError>; +/// The result of formatting an array element via [`DisplayIndex::write`]. +pub type FormatResult = Result<(), FormatError>; impl From for FormatError { fn from(value: std::fmt::Error) -> Self { @@ -352,7 +361,8 @@ impl From for FormatError { } /// [`Display`] but accepting an index -trait DisplayIndex { +pub trait DisplayIndex { + /// Write the value of the underlying array at `idx` to `f`. fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult; } diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index 49fb972684f3..4da1ad90b553 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -22,14 +22,99 @@ //! [`RecordBatch`]: arrow_array::RecordBatch //! [`Array`]: arrow_array::Array -use std::fmt::Display; - use comfy_table::{Cell, Table}; +use std::fmt::{Display, Write}; +use std::sync::Arc; -use arrow_array::{Array, ArrayRef, RecordBatch}; -use arrow_schema::{ArrowError, SchemaRef}; +use arrow_array::cast::AsArray; +use arrow_array::{Array, ArrayRef, Int32Array, RecordBatch, array}; +use arrow_schema::{ArrowError, Field, SchemaRef}; -use crate::display::{ArrayFormatter, FormatOptions}; +use crate::display::{ArrayFormatter, DisplayIndex, FormatError, FormatOptions}; + +/// Allows creating a new [`ArrayFormatter`] for a given [`Array`] and an optional [`Field`]. +/// +/// # Example +/// +/// The example below shows how to create a custom formatter for a custom type `my_money`. +/// +/// ```rust +/// use std::fmt::Write; +/// use arrow_array::{Array, Int32Array, cast::AsArray}; +/// use arrow_cast::display::{ArrayFormatter, DisplayIndex, FormatOptions, FormatResult}; +/// use arrow_cast::pretty::{pretty_format_batches_with_options_and_formatters, ArrayFormatterFactory}; +/// use arrow_schema::{ArrowError, Field}; +/// +/// /// A custom formatter factory that can create a formatter for the special type `my_money`. +/// /// +/// /// This struct could have access to some kind of extension type registry that can lookup the +/// /// correct formatter for an extension type on-demand. +/// struct MyFormatters {} +/// +/// impl ArrayFormatterFactory for MyFormatters { +/// fn create_display_index<'formatter>( +/// &self, +/// array: &'formatter dyn Array, +/// options: &'formatter FormatOptions<'formatter>, +/// field: Option<&'formatter Field>, +/// ) -> Result>, ArrowError> { +/// // check if this is the money type +/// if field +/// .map(|f| f.extension_type_name() == Some("my_money")) +/// .unwrap_or(false) +/// { +/// // We assume that my_money always is an Int32. +/// let array = array.as_primitive(); +/// let display_index = Box::new(MyMoneyFormatter { array, options }); +/// return Ok(Some(ArrayFormatter::new(display_index, options.safe))); +/// } +/// +/// Ok(None) // None indicates that the default formatter should be used. +/// } +/// } +/// +/// /// A formatter for the type `my_money` that wraps a specific array and has access to the +/// /// formatting options. +/// struct MyMoneyFormatter<'a> { +/// array: &'a Int32Array, +/// options: &'a FormatOptions<'a>, +/// } +/// +/// impl<'a> DisplayIndex for MyMoneyFormatter<'a> { +/// fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { +/// match self.array.is_valid(idx) { +/// true => write!(f, "{} €", self.array.value(idx))?, +/// false => write!(f, "{}", self.options.null)?, +/// } +/// +/// Ok(()) +/// } +/// } +/// +/// // Usually, here you would provide your record batches. +/// let my_batches = vec![]; +/// +/// // Call the pretty printer with the custom formatter factory. +/// pretty_format_batches_with_options_and_formatters( +/// &my_batches, +/// &FormatOptions::new(), +/// Some(&MyFormatters {}) +/// ); +/// ``` +pub trait ArrayFormatterFactory { + /// Creates a new [`ArrayFormatter`] for the given [`Array`] and an optional [`Field`]. If the + /// default implementation should be used, return [`None`]. + /// + /// The field shall be used to look up metadata about the `array` while `options` provide + /// information on formatting, for example, dates and times which should be considered by an + /// implementor. + fn create_display_index<'formatter>( + &self, + array: &'formatter dyn Array, + options: &'formatter FormatOptions<'formatter>, + field: Option<&'formatter Field>, + ) -> Result>, ArrowError>; +} /// Create a visual representation of [`RecordBatch`]es /// @@ -94,7 +179,7 @@ pub fn pretty_format_batches_with_schema( results: &[RecordBatch], ) -> Result, ArrowError> { let options = FormatOptions::default().with_display_error(true); - create_table(Some(schema), results, &options) + create_table(Some(schema), results, &options, None) } /// Create a visual representation of [`RecordBatch`]es with formatting options. @@ -131,7 +216,28 @@ pub fn pretty_format_batches_with_options( results: &[RecordBatch], options: &FormatOptions, ) -> Result, ArrowError> { - create_table(None, results, options) + create_table(None, results, options, None) +} + +/// Create a visual representation of [`RecordBatch`]es with formatting options. +/// +/// # Arguments +/// * `results` - A slice of record batches to display +/// * `options` - [`FormatOptions`] that control the resulting display +/// * `formatters` - A slice of [`ArrayFormatter`]s that control the formatting of each column. If +/// a formatter is [`None`], the default formatter will be used. Must be exactly as long as the +/// number of fields in the record batches. +/// +/// # Example +/// +/// For an example see [`ArrayFormatterFactory`]. +/// ``` +pub fn pretty_format_batches_with_options_and_formatters( + results: &[RecordBatch], + options: &FormatOptions, + formatters: Option<&dyn ArrayFormatterFactory>, +) -> Result, ArrowError> { + create_table(None, results, options, formatters) } /// Create a visual representation of [`ArrayRef`] @@ -155,7 +261,22 @@ pub fn pretty_format_columns_with_options( results: &[ArrayRef], options: &FormatOptions, ) -> Result, ArrowError> { - create_column(col_name, results, options) + create_column(col_name, results, options, None) +} + +/// Create a visual representation of [`ArrayRef`] with formatting options and possibly custom +/// [`ArrayFormatter`]s. +/// +/// Returns an error if `formatters` has a different length as `results`. +/// +/// See [`pretty_format_batches_with_options_and_formatters`] for an example +pub fn pretty_format_columns_with_options_and_formatters( + col_name: &str, + results: &[ArrayRef], + options: &FormatOptions, + formatters: Option<&dyn ArrayFormatterFactory>, +) -> Result, ArrowError> { + create_column(col_name, results, options, formatters) } /// Prints a visual representation of record batches to stdout @@ -175,6 +296,7 @@ fn create_table( schema_opt: Option, results: &[RecordBatch], options: &FormatOptions, + formatters: Option<&dyn ArrayFormatterFactory>, ) -> Result { let mut table = Table::new(); table.load_preset("||--+-++| ++++++"); @@ -187,10 +309,10 @@ fn create_table( } }); - if let Some(schema) = schema_opt { + if let Some(schema) = &schema_opt { let mut header = Vec::new(); for field in schema.fields() { - if options.types_info() { + if options.types_info { header.push(Cell::new(format!( "{}\n{}", field.name(), @@ -208,10 +330,18 @@ fn create_table( } for batch in results { + let schema = schema_opt.as_ref().unwrap_or(batch.schema_ref()); let formatters = batch .columns() .iter() - .map(|c| ArrayFormatter::try_new(c.as_ref(), options)) + .zip(schema.fields().iter()) + .map(|(c, field)| match formatters { + None => ArrayFormatter::try_new(c.as_ref(), options), + Some(formatters) => formatters + .create_display_index(c.as_ref(), options, Some(field)) + .transpose() + .unwrap_or_else(|| ArrayFormatter::try_new(c.as_ref(), options)), + }) .collect::, ArrowError>>()?; for row in 0..batch.num_rows() { @@ -230,6 +360,7 @@ fn create_column( field: &str, columns: &[ArrayRef], options: &FormatOptions, + formatters: Option<&dyn ArrayFormatterFactory>, ) -> Result { let mut table = Table::new(); table.load_preset("||--+-++| ++++++"); @@ -242,7 +373,13 @@ fn create_column( table.set_header(header); for col in columns { - let formatter = ArrayFormatter::try_new(col.as_ref(), options)?; + let formatter = match formatters { + None => ArrayFormatter::try_new(col.as_ref(), options)?, + Some(formatters) => formatters + .create_display_index(col.as_ref(), options, None) + .transpose() + .unwrap_or_else(|| ArrayFormatter::try_new(col.as_ref(), options))?, + }; for row in 0..col.len() { let cells = vec![Cell::new(formatter.value(row))]; table.add_row(cells); @@ -254,18 +391,20 @@ fn create_column( #[cfg(test)] mod tests { + use std::collections::HashMap; use std::fmt::Write; use std::sync::Arc; use half::f16; use arrow_array::builder::*; + use arrow_array::cast::AsArray; use arrow_array::types::*; use arrow_array::*; use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer}; use arrow_schema::*; - use crate::display::{DurationFormat, array_value_to_string}; + use crate::display::{DisplayIndex, DurationFormat, FormatError, array_value_to_string}; use super::*; @@ -1283,4 +1422,96 @@ mod tests { let actual: Vec<&str> = iso.lines().collect(); assert_eq!(expected_iso, actual, "Actual result:\n{iso}"); } + + #[test] + fn test_writing_with_custom_formatters() { + /// The factory that will create the [`ArrayFormatter`]s. + struct MyFormatters {} + + impl ArrayFormatterFactory for MyFormatters { + fn create_display_index<'formatter>( + &self, + array: &'formatter dyn Array, + options: &'formatter FormatOptions<'formatter>, + field: Option<&'formatter Field>, + ) -> Result>, ArrowError> { + if field + .map(|f| f.extension_type_name() == Some("my_money")) + .unwrap_or(false) + { + // We assume that my_money always is an Int32. + let array = array.as_primitive(); + let display_index = Box::new(MyMoneyFormatter { array, options }); + return Ok(Some(ArrayFormatter::new(display_index, options.safe))); + } + + Ok(None) + } + } + + /// The actual formatter + struct MyMoneyFormatter<'a> { + array: &'a Int32Array, + options: &'a FormatOptions<'a>, + } + + impl<'a> DisplayIndex for MyMoneyFormatter<'a> { + fn write(&self, idx: usize, f: &mut dyn Write) -> crate::display::FormatResult { + match self.array.is_valid(idx) { + true => write!(f, "{} €", self.array.value(idx))?, + false => write!(f, "{}", self.options.null)?, + } + + Ok(()) + } + } + + // define a schema. + let options = FormatOptions::new().with_null(""); + let money_metadata = HashMap::from([( + extension::EXTENSION_TYPE_NAME_KEY.to_owned(), + "my_money".to_owned(), + )]); + let schema = Arc::new(Schema::new(vec![ + Field::new("income", DataType::Int32, true).with_metadata(money_metadata.clone()), + ])); + + // define data. + let batch = RecordBatch::try_new( + schema, + vec![Arc::new(array::Int32Array::from(vec![ + Some(1), + None, + Some(10), + Some(100), + ]))], + ) + .unwrap(); + + let mut buf = String::new(); + write!( + &mut buf, + "{}", + pretty_format_batches_with_options_and_formatters( + &[batch], + &options, + Some(&MyFormatters {}) + ) + .unwrap() + ) + .unwrap(); + + let s = [ + "+--------+", + "| income |", + "+--------+", + "| 1 € |", + "| |", + "| 10 € |", + "| 100 € |", + "+--------+", + ]; + let expected = s.join("\n"); + assert_eq!(expected, buf); + } } From 47f00a7fc0d1ff99d216133193b967be29f9ead1 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 12 Nov 2025 23:47:09 +0100 Subject: [PATCH 2/8] Improve custom pretty printing --- arrow-cast/src/pretty.rs | 200 +++++++++++++++++++++++++++++++-------- 1 file changed, 162 insertions(+), 38 deletions(-) diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index 4da1ad90b553..eac1fb9fd357 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -27,10 +27,10 @@ use std::fmt::{Display, Write}; use std::sync::Arc; use arrow_array::cast::AsArray; -use arrow_array::{Array, ArrayRef, Int32Array, RecordBatch, array}; +use arrow_array::{array, Array, ArrayRef, Int32Array, RecordBatch}; use arrow_schema::{ArrowError, Field, SchemaRef}; -use crate::display::{ArrayFormatter, DisplayIndex, FormatError, FormatOptions}; +use crate::display::{ArrayFormatter, DisplayIndex, FormatOptions}; /// Allows creating a new [`ArrayFormatter`] for a given [`Array`] and an optional [`Field`]. /// @@ -404,7 +404,7 @@ mod tests { use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer}; use arrow_schema::*; - use crate::display::{DisplayIndex, DurationFormat, FormatError, array_value_to_string}; + use crate::display::{array_value_to_string, DisplayIndex, DurationFormat}; use super::*; @@ -1423,49 +1423,77 @@ mod tests { assert_eq!(expected_iso, actual, "Actual result:\n{iso}"); } - #[test] - fn test_writing_with_custom_formatters() { - /// The factory that will create the [`ArrayFormatter`]s. - struct MyFormatters {} - - impl ArrayFormatterFactory for MyFormatters { - fn create_display_index<'formatter>( - &self, - array: &'formatter dyn Array, - options: &'formatter FormatOptions<'formatter>, - field: Option<&'formatter Field>, - ) -> Result>, ArrowError> { - if field - .map(|f| f.extension_type_name() == Some("my_money")) - .unwrap_or(false) - { - // We assume that my_money always is an Int32. - let array = array.as_primitive(); - let display_index = Box::new(MyMoneyFormatter { array, options }); - return Ok(Some(ArrayFormatter::new(display_index, options.safe))); - } - - Ok(None) + // + // Custom Formatting + // + + /// The factory that will create the [`ArrayFormatter`]s. + struct TestFormatters {} + + impl ArrayFormatterFactory for TestFormatters { + fn create_display_index<'formatter>( + &self, + array: &'formatter dyn Array, + options: &'formatter FormatOptions<'formatter>, + field: Option<&'formatter Field>, + ) -> Result>, ArrowError> { + if field + .map(|f| f.extension_type_name() == Some("my_money")) + .unwrap_or(false) + { + // We assume that my_money always is an Int32. + let array = array.as_primitive(); + let display_index = Box::new(MyMoneyFormatter { array, options }); + return Ok(Some(ArrayFormatter::new(display_index, options.safe))); + } + + if array.data_type() == &DataType::Int32 { + // We assume that my_money always is an Int32. + let array = array.as_primitive(); + let display_index = Box::new(MyInt32Formatter { array, options }); + return Ok(Some(ArrayFormatter::new(display_index, options.safe))); } + + Ok(None) } + } - /// The actual formatter - struct MyMoneyFormatter<'a> { - array: &'a Int32Array, - options: &'a FormatOptions<'a>, + /// The actual formatter + struct MyMoneyFormatter<'a> { + array: &'a Int32Array, + options: &'a FormatOptions<'a>, + } + + impl<'a> DisplayIndex for MyMoneyFormatter<'a> { + fn write(&self, idx: usize, f: &mut dyn Write) -> crate::display::FormatResult { + match self.array.is_valid(idx) { + true => write!(f, "{} €", self.array.value(idx))?, + false => write!(f, "{}", self.options.null)?, + } + + Ok(()) } + } - impl<'a> DisplayIndex for MyMoneyFormatter<'a> { - fn write(&self, idx: usize, f: &mut dyn Write) -> crate::display::FormatResult { - match self.array.is_valid(idx) { - true => write!(f, "{} €", self.array.value(idx))?, - false => write!(f, "{}", self.options.null)?, - } + /// The actual formatter + struct MyInt32Formatter<'a> { + array: &'a Int32Array, + options: &'a FormatOptions<'a>, + } - Ok(()) + impl<'a> DisplayIndex for MyInt32Formatter<'a> { + fn write(&self, idx: usize, f: &mut dyn Write) -> crate::display::FormatResult { + match self.array.is_valid(idx) { + true => write!(f, "{} (32-Bit)", self.array.value(idx))?, + false => write!(f, "{}", self.options.null)?, } + + Ok(()) } + } + #[test] + fn test_format_batches_with_custom_formatters() { // define a schema. let options = FormatOptions::new().with_null(""); let money_metadata = HashMap::from([( @@ -1495,7 +1523,7 @@ mod tests { pretty_format_batches_with_options_and_formatters( &[batch], &options, - Some(&MyFormatters {}) + Some(&TestFormatters {}) ) .unwrap() ) @@ -1514,4 +1542,100 @@ mod tests { let expected = s.join("\n"); assert_eq!(expected, buf); } + + #[test] + fn test_format_batches_with_custom_formatters_custom_schema_overrules_batch_schema() { + // define a schema. + let options = FormatOptions::new(); + let money_metadata = HashMap::from([( + extension::EXTENSION_TYPE_NAME_KEY.to_owned(), + "my_money".to_owned(), + )]); + let schema = Arc::new(Schema::new(vec![ + Field::new("income", DataType::Int32, true).with_metadata(money_metadata.clone()), + ])); + + // define data. + let batch = RecordBatch::try_new( + schema, + vec![Arc::new(array::Int32Array::from(vec![ + Some(1), + None, + Some(10), + Some(100), + ]))], + ) + .unwrap(); + + let mut buf = String::new(); + write!( + &mut buf, + "{}", + create_table( + // No metadata compared to test_format_batches_with_custom_formatters + Some(Arc::new(Schema::new(vec![Field::new( + "income", + DataType::Int32, + true + ),]))), + &[batch], + &options, + Some(&TestFormatters {}) + ) + .unwrap() + ) + .unwrap(); + + // No € formatting as in test_format_batches_with_custom_formatters + let s = [ + "+--------------+", + "| income |", + "+--------------+", + "| 1 (32-Bit) |", + "| |", + "| 10 (32-Bit) |", + "| 100 (32-Bit) |", + "+--------------+", + ]; + let expected = s.join("\n"); + assert_eq!(expected, buf); + } + + #[test] + fn test_format_column_with_custom_formatters() { + // define data. + let array = Arc::new(array::Int32Array::from(vec![ + Some(1), + None, + Some(10), + Some(100), + ])); + + let mut buf = String::new(); + write!( + &mut buf, + "{}", + pretty_format_columns_with_options_and_formatters( + "income", + &[array], + &FormatOptions::default(), + Some(&TestFormatters {}) + ) + .unwrap() + ) + .unwrap(); + + let s = [ + "+--------------+", + "| income |", + "+--------------+", + "| 1 (32-Bit) |", + "| |", + "| 10 (32-Bit) |", + "| 100 (32-Bit) |", + "+--------------+", + ]; + let expected = s.join("\n"); + assert_eq!(expected, buf); + } } From 37bac6f305e94f5c4f4d18034b3ac50485fd8dee Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 12 Nov 2025 23:53:59 +0100 Subject: [PATCH 3/8] Add sanity check for number of columns --- arrow-cast/src/pretty.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index eac1fb9fd357..c6036d0c2506 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -331,6 +331,12 @@ fn create_table( for batch in results { let schema = schema_opt.as_ref().unwrap_or(batch.schema_ref()); + + // Could be a custom schema that was provided. + if batch.columns().len() != schema.fields().len() { + return Err(ArrowError::InvalidArgumentError("Expected the same number of columns in a record batch as the number of fields in the schema".to_owned())); + } + let formatters = batch .columns() .iter() @@ -1638,4 +1644,31 @@ mod tests { let expected = s.join("\n"); assert_eq!(expected, buf); } + + #[test] + fn test_pretty_format_batches_with_schema_with_wrong_number_of_fields() { + let schema_a = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + ])); + let schema_b = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)])); + + // define data. + let batch = RecordBatch::try_new( + schema_b, + vec![Arc::new(array::Int32Array::from(vec![ + Some(1), + None, + Some(10), + Some(100), + ]))], + ) + .unwrap(); + + let result = pretty_format_batches_with_schema(schema_a, &[batch]); + assert!( + matches!(result, Err(ArrowError::InvalidArgumentError(_))), + "Error expected" + ); + } } From 1e8f101eb0dd145bb2026bf49eb334f168324675 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 12 Nov 2025 23:55:20 +0100 Subject: [PATCH 4/8] Formatting --- arrow-cast/src/pretty.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index c6036d0c2506..9a99718a28e0 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -27,7 +27,7 @@ use std::fmt::{Display, Write}; use std::sync::Arc; use arrow_array::cast::AsArray; -use arrow_array::{array, Array, ArrayRef, Int32Array, RecordBatch}; +use arrow_array::{Array, ArrayRef, Int32Array, RecordBatch, array}; use arrow_schema::{ArrowError, Field, SchemaRef}; use crate::display::{ArrayFormatter, DisplayIndex, FormatOptions}; @@ -410,7 +410,7 @@ mod tests { use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer}; use arrow_schema::*; - use crate::display::{array_value_to_string, DisplayIndex, DurationFormat}; + use crate::display::{DisplayIndex, DurationFormat, array_value_to_string}; use super::*; From fcc6478c3fae99bdbe2f27df3a096eb6cdff83f9 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Thu, 13 Nov 2025 00:03:25 +0100 Subject: [PATCH 5/8] Minor fixes --- arrow-cast/src/pretty.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index 9a99718a28e0..20d78ea492cb 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -23,14 +23,12 @@ //! [`Array`]: arrow_array::Array use comfy_table::{Cell, Table}; -use std::fmt::{Display, Write}; -use std::sync::Arc; +use std::fmt::{Display}; -use arrow_array::cast::AsArray; -use arrow_array::{Array, ArrayRef, Int32Array, RecordBatch, array}; +use arrow_array::{Array, ArrayRef, RecordBatch}; use arrow_schema::{ArrowError, Field, SchemaRef}; -use crate::display::{ArrayFormatter, DisplayIndex, FormatOptions}; +use crate::display::{ArrayFormatter, FormatOptions}; /// Allows creating a new [`ArrayFormatter`] for a given [`Array`] and an optional [`Field`]. /// @@ -230,8 +228,7 @@ pub fn pretty_format_batches_with_options( /// /// # Example /// -/// For an example see [`ArrayFormatterFactory`]. -/// ``` +/// For an example, see [`ArrayFormatterFactory`]. pub fn pretty_format_batches_with_options_and_formatters( results: &[RecordBatch], options: &FormatOptions, From 0599d1722a2ff49b60c7f9f772ce3c785396b0a8 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Fri, 14 Nov 2025 08:53:34 +0100 Subject: [PATCH 6/8] Use accessors in FormatOptions --- arrow-cast/src/display.rs | 65 ++++++++++++++++++++++++++++++--------- arrow-cast/src/pretty.rs | 12 ++++---- 2 files changed, 57 insertions(+), 20 deletions(-) diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index 4584bb17e2a6..34daf5cedacd 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -57,23 +57,23 @@ pub enum DurationFormat { pub struct FormatOptions<'a> { /// If set to `true` any formatting errors will be written to the output /// instead of being converted into a [`std::fmt::Error`] - pub safe: bool, + safe: bool, /// Format string for nulls - pub null: &'a str, + null: &'a str, /// Date format for date arrays - pub date_format: TimeFormat<'a>, + date_format: TimeFormat<'a>, /// Format for DateTime arrays - pub datetime_format: TimeFormat<'a>, + datetime_format: TimeFormat<'a>, /// Timestamp format for timestamp arrays - pub timestamp_format: TimeFormat<'a>, + timestamp_format: TimeFormat<'a>, /// Timestamp format for timestamp with timezone arrays - pub timestamp_tz_format: TimeFormat<'a>, + timestamp_tz_format: TimeFormat<'a>, /// Time format for time arrays - pub time_format: TimeFormat<'a>, + time_format: TimeFormat<'a>, /// Duration format - pub duration_format: DurationFormat, + duration_format: DurationFormat, /// Show types in visual representation batches - pub types_info: bool, + types_info: bool, } impl Default for FormatOptions<'_> { @@ -169,11 +169,48 @@ impl<'a> FormatOptions<'a> { Self { types_info, ..self } } - /// Returns true if type info should be included in visual representation of batches - #[deprecated( - since = "58.0.0", - note = "Directly access the `types_info` field instead.`" - )] + /// Returns whether formatting errors should be written to the output instead of being converted + /// into a [`std::fmt::Error`]. + pub const fn safe(&self) -> bool { + self.safe + } + + /// Returns the string used for displaying nulls. + pub const fn null(&self) -> &'a str { + self.null + } + + /// Returns the [`TimeFormat`] for date arrays. + pub const fn date_format(&self) -> TimeFormat<'a> { + self.date_format + } + + /// Returns the [`TimeFormat`] for datetime arrays. + pub const fn datetime_format(&self) -> TimeFormat<'a> { + self.datetime_format + } + + /// Returns the [`TimeFormat`] for timestamp arrays. + pub const fn timestamp_format(&self) -> TimeFormat<'a> { + self.timestamp_format + } + + /// Returns the [`TimeFormat`] for timezone arrays. + pub const fn timestamp_tz_format(&self) -> TimeFormat<'a> { + self.timestamp_tz_format + } + + /// Returns the [`TimeFormat`] for time arrays. + pub const fn time_format(&self) -> TimeFormat<'a> { + self.time_format + } + + /// Returns the [`DurationFormat`]. + pub const fn duration_format(&self) -> DurationFormat { + self.duration_format + } + + /// Returns true if type info should be included in a visual representation of batches. pub const fn types_info(&self) -> bool { self.types_info } diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index 20d78ea492cb..bb01fc5dea21 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -23,7 +23,7 @@ //! [`Array`]: arrow_array::Array use comfy_table::{Cell, Table}; -use std::fmt::{Display}; +use std::fmt::Display; use arrow_array::{Array, ArrayRef, RecordBatch}; use arrow_schema::{ArrowError, Field, SchemaRef}; @@ -309,7 +309,7 @@ fn create_table( if let Some(schema) = &schema_opt { let mut header = Vec::new(); for field in schema.fields() { - if options.types_info { + if options.types_info() { header.push(Cell::new(format!( "{}\n{}", field.name(), @@ -1447,14 +1447,14 @@ mod tests { // We assume that my_money always is an Int32. let array = array.as_primitive(); let display_index = Box::new(MyMoneyFormatter { array, options }); - return Ok(Some(ArrayFormatter::new(display_index, options.safe))); + return Ok(Some(ArrayFormatter::new(display_index, options.safe()))); } if array.data_type() == &DataType::Int32 { // We assume that my_money always is an Int32. let array = array.as_primitive(); let display_index = Box::new(MyInt32Formatter { array, options }); - return Ok(Some(ArrayFormatter::new(display_index, options.safe))); + return Ok(Some(ArrayFormatter::new(display_index, options.safe()))); } Ok(None) @@ -1471,7 +1471,7 @@ mod tests { fn write(&self, idx: usize, f: &mut dyn Write) -> crate::display::FormatResult { match self.array.is_valid(idx) { true => write!(f, "{} €", self.array.value(idx))?, - false => write!(f, "{}", self.options.null)?, + false => write!(f, "{}", self.options.null())?, } Ok(()) @@ -1488,7 +1488,7 @@ mod tests { fn write(&self, idx: usize, f: &mut dyn Write) -> crate::display::FormatResult { match self.array.is_valid(idx) { true => write!(f, "{} (32-Bit)", self.array.value(idx))?, - false => write!(f, "{}", self.options.null)?, + false => write!(f, "{}", self.options.null())?, } Ok(()) From 68823946a512b5125a8fa8bd088be9bb9fd6d1c8 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Fri, 14 Nov 2025 09:29:27 +0100 Subject: [PATCH 7/8] Move ArrayFormatterFactory into FormatOptions --- arrow-cast/src/display.rs | 88 +++++++++++++++++++++++++++++++++++---- arrow-cast/src/pretty.rs | 88 ++++++++++----------------------------- 2 files changed, 104 insertions(+), 72 deletions(-) diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index 34daf5cedacd..51af222dac98 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -24,8 +24,10 @@ //! //! [`pretty`]: crate::pretty use std::fmt::{Display, Formatter, Write}; +use std::hash::{Hash, Hasher}; use std::ops::Range; +use crate::pretty::ArrayFormatterFactory; use arrow_array::cast::*; use arrow_array::temporal_conversions::*; use arrow_array::timezone::Tz; @@ -53,7 +55,12 @@ pub enum DurationFormat { /// By default nulls are formatted as `""` and temporal types formatted /// according to RFC3339 /// -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +/// # Equality +/// +/// Most fields in [`FormatOptions`] are compared by value, except `formatter_factory`. As the trait +/// does not require an [`Eq`] and [`Hash`] implementation, this struct only compares the pointer of +/// the factories. +#[derive(Debug, Clone)] pub struct FormatOptions<'a> { /// If set to `true` any formatting errors will be written to the output /// instead of being converted into a [`std::fmt::Error`] @@ -74,6 +81,10 @@ pub struct FormatOptions<'a> { duration_format: DurationFormat, /// Show types in visual representation batches types_info: bool, + /// Formatter factory used to instantiate custom [`ArrayFormatter`]s. This allows users to + /// provide custom formatters. + #[cfg(feature = "prettyprint")] + formatter_factory: Option<&'a dyn ArrayFormatterFactory>, } impl Default for FormatOptions<'_> { @@ -82,6 +93,43 @@ impl Default for FormatOptions<'_> { } } +impl PartialEq for FormatOptions<'_> { + fn eq(&self, other: &Self) -> bool { + self.safe == other.safe + && self.null == other.null + && self.date_format == other.date_format + && self.datetime_format == other.datetime_format + && self.timestamp_format == other.timestamp_format + && self.timestamp_tz_format == other.timestamp_tz_format + && self.time_format == other.time_format + && self.duration_format == other.duration_format + && match (self.formatter_factory, other.formatter_factory) { + (Some(f1), Some(f2)) => std::ptr::eq(f1, f2), + (None, None) => true, + _ => false, + } + } +} + +impl Eq for FormatOptions<'_> {} + +impl Hash for FormatOptions<'_> { + fn hash(&self, state: &mut H) { + self.safe.hash(state); + self.null.hash(state); + self.date_format.hash(state); + self.datetime_format.hash(state); + self.timestamp_format.hash(state); + self.timestamp_tz_format.hash(state); + self.time_format.hash(state); + self.duration_format.hash(state); + self.types_info.hash(state); + self.formatter_factory + .map(|f| f as *const dyn ArrayFormatterFactory) + .hash(state); + } +} + impl<'a> FormatOptions<'a> { /// Creates a new set of format options pub const fn new() -> Self { @@ -95,6 +143,7 @@ impl<'a> FormatOptions<'a> { time_format: None, duration_format: DurationFormat::ISO8601, types_info: false, + formatter_factory: None, } } @@ -169,6 +218,26 @@ impl<'a> FormatOptions<'a> { Self { types_info, ..self } } + /// Overrides the [`ArrayFormatterFactory`] used to instantiate custom [`ArrayFormatter`]s. + pub const fn with_formatter_factory( + self, + formatter_factory: &'a dyn ArrayFormatterFactory, + ) -> Self { + Self { + formatter_factory: Some(formatter_factory), + ..self + } + } + + /// Removes the [`ArrayFormatterFactory`] used to instantiate custom [`ArrayFormatter`]s. This + /// will cause pretty-printers to use the default [`ArrayFormatter`]s. + pub const fn without_formatter_factory(self) -> Self { + Self { + formatter_factory: None, + ..self + } + } + /// Returns whether formatting errors should be written to the output instead of being converted /// into a [`std::fmt::Error`]. pub const fn safe(&self) -> bool { @@ -180,32 +249,32 @@ impl<'a> FormatOptions<'a> { self.null } - /// Returns the [`TimeFormat`] for date arrays. + /// Returns the format used for [`DataType::Date32`] columns. pub const fn date_format(&self) -> TimeFormat<'a> { self.date_format } - /// Returns the [`TimeFormat`] for datetime arrays. + /// Returns the format used for [`DataType::Date64`] columns. pub const fn datetime_format(&self) -> TimeFormat<'a> { self.datetime_format } - /// Returns the [`TimeFormat`] for timestamp arrays. + /// Returns the format used for [`DataType::Timestamp`] columns without a timezone. pub const fn timestamp_format(&self) -> TimeFormat<'a> { self.timestamp_format } - /// Returns the [`TimeFormat`] for timezone arrays. + /// Returns the format used for [`DataType::Timestamp`] columns with a timezone. pub const fn timestamp_tz_format(&self) -> TimeFormat<'a> { self.timestamp_tz_format } - /// Returns the [`TimeFormat`] for time arrays. + /// Returns the format used for [`DataType::Time32`] and [`DataType::Time64`] columns. pub const fn time_format(&self) -> TimeFormat<'a> { self.time_format } - /// Returns the [`DurationFormat`]. + /// Returns the [`DurationFormat`] used for duration columns. pub const fn duration_format(&self) -> DurationFormat { self.duration_format } @@ -214,6 +283,11 @@ impl<'a> FormatOptions<'a> { pub const fn types_info(&self) -> bool { self.types_info } + + /// Returns the [`ArrayFormatterFactory`] used to instantiate custom [`ArrayFormatter`]s. + pub const fn formatter_factory(&self) -> Option<&'a dyn ArrayFormatterFactory> { + self.formatter_factory + } } /// Implements [`Display`] for a specific array value diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index bb01fc5dea21..02fa917d3ee9 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -22,11 +22,10 @@ //! [`RecordBatch`]: arrow_array::RecordBatch //! [`Array`]: arrow_array::Array -use comfy_table::{Cell, Table}; -use std::fmt::Display; - use arrow_array::{Array, ArrayRef, RecordBatch}; use arrow_schema::{ArrowError, Field, SchemaRef}; +use comfy_table::{Cell, Table}; +use std::fmt::{Debug, Display}; use crate::display::{ArrayFormatter, FormatOptions}; @@ -38,15 +37,16 @@ use crate::display::{ArrayFormatter, FormatOptions}; /// /// ```rust /// use std::fmt::Write; -/// use arrow_array::{Array, Int32Array, cast::AsArray}; +/// use arrow_array::{cast::AsArray, Array, Int32Array}; /// use arrow_cast::display::{ArrayFormatter, DisplayIndex, FormatOptions, FormatResult}; -/// use arrow_cast::pretty::{pretty_format_batches_with_options_and_formatters, ArrayFormatterFactory}; +/// use arrow_cast::pretty::{pretty_format_batches_with_options, ArrayFormatterFactory}; /// use arrow_schema::{ArrowError, Field}; /// /// /// A custom formatter factory that can create a formatter for the special type `my_money`. /// /// /// /// This struct could have access to some kind of extension type registry that can lookup the /// /// correct formatter for an extension type on-demand. +/// #[derive(Debug)] /// struct MyFormatters {} /// /// impl ArrayFormatterFactory for MyFormatters { @@ -64,7 +64,7 @@ use crate::display::{ArrayFormatter, FormatOptions}; /// // We assume that my_money always is an Int32. /// let array = array.as_primitive(); /// let display_index = Box::new(MyMoneyFormatter { array, options }); -/// return Ok(Some(ArrayFormatter::new(display_index, options.safe))); +/// return Ok(Some(ArrayFormatter::new(display_index, options.safe()))); /// } /// /// Ok(None) // None indicates that the default formatter should be used. @@ -82,7 +82,7 @@ use crate::display::{ArrayFormatter, FormatOptions}; /// fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { /// match self.array.is_valid(idx) { /// true => write!(f, "{} €", self.array.value(idx))?, -/// false => write!(f, "{}", self.options.null)?, +/// false => write!(f, "{}", self.options.null())?, /// } /// /// Ok(()) @@ -93,13 +93,12 @@ use crate::display::{ArrayFormatter, FormatOptions}; /// let my_batches = vec![]; /// /// // Call the pretty printer with the custom formatter factory. -/// pretty_format_batches_with_options_and_formatters( +/// pretty_format_batches_with_options( /// &my_batches, -/// &FormatOptions::new(), -/// Some(&MyFormatters {}) +/// &FormatOptions::new().with_formatter_factory(&MyFormatters {}) /// ); /// ``` -pub trait ArrayFormatterFactory { +pub trait ArrayFormatterFactory: Debug { /// Creates a new [`ArrayFormatter`] for the given [`Array`] and an optional [`Field`]. If the /// default implementation should be used, return [`None`]. /// @@ -177,7 +176,7 @@ pub fn pretty_format_batches_with_schema( results: &[RecordBatch], ) -> Result, ArrowError> { let options = FormatOptions::default().with_display_error(true); - create_table(Some(schema), results, &options, None) + create_table(Some(schema), results, &options) } /// Create a visual representation of [`RecordBatch`]es with formatting options. @@ -214,27 +213,7 @@ pub fn pretty_format_batches_with_options( results: &[RecordBatch], options: &FormatOptions, ) -> Result, ArrowError> { - create_table(None, results, options, None) -} - -/// Create a visual representation of [`RecordBatch`]es with formatting options. -/// -/// # Arguments -/// * `results` - A slice of record batches to display -/// * `options` - [`FormatOptions`] that control the resulting display -/// * `formatters` - A slice of [`ArrayFormatter`]s that control the formatting of each column. If -/// a formatter is [`None`], the default formatter will be used. Must be exactly as long as the -/// number of fields in the record batches. -/// -/// # Example -/// -/// For an example, see [`ArrayFormatterFactory`]. -pub fn pretty_format_batches_with_options_and_formatters( - results: &[RecordBatch], - options: &FormatOptions, - formatters: Option<&dyn ArrayFormatterFactory>, -) -> Result, ArrowError> { - create_table(None, results, options, formatters) + create_table(None, results, options) } /// Create a visual representation of [`ArrayRef`] @@ -258,22 +237,7 @@ pub fn pretty_format_columns_with_options( results: &[ArrayRef], options: &FormatOptions, ) -> Result, ArrowError> { - create_column(col_name, results, options, None) -} - -/// Create a visual representation of [`ArrayRef`] with formatting options and possibly custom -/// [`ArrayFormatter`]s. -/// -/// Returns an error if `formatters` has a different length as `results`. -/// -/// See [`pretty_format_batches_with_options_and_formatters`] for an example -pub fn pretty_format_columns_with_options_and_formatters( - col_name: &str, - results: &[ArrayRef], - options: &FormatOptions, - formatters: Option<&dyn ArrayFormatterFactory>, -) -> Result, ArrowError> { - create_column(col_name, results, options, formatters) + create_column(col_name, results, options) } /// Prints a visual representation of record batches to stdout @@ -293,7 +257,6 @@ fn create_table( schema_opt: Option, results: &[RecordBatch], options: &FormatOptions, - formatters: Option<&dyn ArrayFormatterFactory>, ) -> Result { let mut table = Table::new(); table.load_preset("||--+-++| ++++++"); @@ -338,7 +301,7 @@ fn create_table( .columns() .iter() .zip(schema.fields().iter()) - .map(|(c, field)| match formatters { + .map(|(c, field)| match options.formatter_factory() { None => ArrayFormatter::try_new(c.as_ref(), options), Some(formatters) => formatters .create_display_index(c.as_ref(), options, Some(field)) @@ -363,7 +326,6 @@ fn create_column( field: &str, columns: &[ArrayRef], options: &FormatOptions, - formatters: Option<&dyn ArrayFormatterFactory>, ) -> Result { let mut table = Table::new(); table.load_preset("||--+-++| ++++++"); @@ -376,7 +338,7 @@ fn create_column( table.set_header(header); for col in columns { - let formatter = match formatters { + let formatter = match options.formatter_factory() { None => ArrayFormatter::try_new(col.as_ref(), options)?, Some(formatters) => formatters .create_display_index(col.as_ref(), options, None) @@ -1431,6 +1393,7 @@ mod tests { // /// The factory that will create the [`ArrayFormatter`]s. + #[derive(Debug)] struct TestFormatters {} impl ArrayFormatterFactory for TestFormatters { @@ -1498,7 +1461,9 @@ mod tests { #[test] fn test_format_batches_with_custom_formatters() { // define a schema. - let options = FormatOptions::new().with_null(""); + let options = FormatOptions::new() + .with_null("") + .with_formatter_factory(&TestFormatters {}); let money_metadata = HashMap::from([( extension::EXTENSION_TYPE_NAME_KEY.to_owned(), "my_money".to_owned(), @@ -1523,12 +1488,7 @@ mod tests { write!( &mut buf, "{}", - pretty_format_batches_with_options_and_formatters( - &[batch], - &options, - Some(&TestFormatters {}) - ) - .unwrap() + pretty_format_batches_with_options(&[batch], &options).unwrap() ) .unwrap(); @@ -1549,7 +1509,7 @@ mod tests { #[test] fn test_format_batches_with_custom_formatters_custom_schema_overrules_batch_schema() { // define a schema. - let options = FormatOptions::new(); + let options = FormatOptions::new().with_formatter_factory(&TestFormatters {}); let money_metadata = HashMap::from([( extension::EXTENSION_TYPE_NAME_KEY.to_owned(), "my_money".to_owned(), @@ -1583,7 +1543,6 @@ mod tests { ),]))), &[batch], &options, - Some(&TestFormatters {}) ) .unwrap() ) @@ -1618,11 +1577,10 @@ mod tests { write!( &mut buf, "{}", - pretty_format_columns_with_options_and_formatters( + pretty_format_columns_with_options( "income", &[array], - &FormatOptions::default(), - Some(&TestFormatters {}) + &FormatOptions::default().with_formatter_factory(&TestFormatters {}) ) .unwrap() ) From d18674181be812509a1643ed7e8ac0cda6fdccde Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Fri, 14 Nov 2025 09:38:13 +0100 Subject: [PATCH 8/8] Move ArrayFormatterFactory to display module to avoid issues with feature flags --- arrow-cast/src/display.rs | 89 +++++++++++++++++++++++++++++++++++-- arrow-cast/src/pretty.rs | 92 +++------------------------------------ 2 files changed, 91 insertions(+), 90 deletions(-) diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index 51af222dac98..2071f74fcf2c 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -23,11 +23,10 @@ //! record batch pretty printing. //! //! [`pretty`]: crate::pretty -use std::fmt::{Display, Formatter, Write}; +use std::fmt::{Debug, Display, Formatter, Write}; use std::hash::{Hash, Hasher}; use std::ops::Range; -use crate::pretty::ArrayFormatterFactory; use arrow_array::cast::*; use arrow_array::temporal_conversions::*; use arrow_array::timezone::Tz; @@ -83,7 +82,6 @@ pub struct FormatOptions<'a> { types_info: bool, /// Formatter factory used to instantiate custom [`ArrayFormatter`]s. This allows users to /// provide custom formatters. - #[cfg(feature = "prettyprint")] formatter_factory: Option<&'a dyn ArrayFormatterFactory>, } @@ -290,6 +288,91 @@ impl<'a> FormatOptions<'a> { } } +/// Allows creating a new [`ArrayFormatter`] for a given [`Array`] and an optional [`Field`]. +/// +/// # Example +/// +/// The example below shows how to create a custom formatter for a custom type `my_money`. Note that +/// this example requires the `prettyprint` feature. +/// +/// ```rust +/// use std::fmt::Write; +/// use arrow_array::{cast::AsArray, Array, Int32Array}; +/// use arrow_cast::display::{ArrayFormatter, ArrayFormatterFactory, DisplayIndex, FormatOptions, FormatResult}; +/// use arrow_cast::pretty::pretty_format_batches_with_options; +/// use arrow_schema::{ArrowError, Field}; +/// +/// /// A custom formatter factory that can create a formatter for the special type `my_money`. +/// /// +/// /// This struct could have access to some kind of extension type registry that can lookup the +/// /// correct formatter for an extension type on-demand. +/// #[derive(Debug)] +/// struct MyFormatters {} +/// +/// impl ArrayFormatterFactory for MyFormatters { +/// fn create_display_index<'formatter>( +/// &self, +/// array: &'formatter dyn Array, +/// options: &'formatter FormatOptions<'formatter>, +/// field: Option<&'formatter Field>, +/// ) -> Result>, ArrowError> { +/// // check if this is the money type +/// if field +/// .map(|f| f.extension_type_name() == Some("my_money")) +/// .unwrap_or(false) +/// { +/// // We assume that my_money always is an Int32. +/// let array = array.as_primitive(); +/// let display_index = Box::new(MyMoneyFormatter { array, options }); +/// return Ok(Some(ArrayFormatter::new(display_index, options.safe()))); +/// } +/// +/// Ok(None) // None indicates that the default formatter should be used. +/// } +/// } +/// +/// /// A formatter for the type `my_money` that wraps a specific array and has access to the +/// /// formatting options. +/// struct MyMoneyFormatter<'a> { +/// array: &'a Int32Array, +/// options: &'a FormatOptions<'a>, +/// } +/// +/// impl<'a> DisplayIndex for MyMoneyFormatter<'a> { +/// fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { +/// match self.array.is_valid(idx) { +/// true => write!(f, "{} €", self.array.value(idx))?, +/// false => write!(f, "{}", self.options.null())?, +/// } +/// +/// Ok(()) +/// } +/// } +/// +/// // Usually, here you would provide your record batches. +/// let my_batches = vec![]; +/// +/// // Call the pretty printer with the custom formatter factory. +/// pretty_format_batches_with_options( +/// &my_batches, +/// &FormatOptions::new().with_formatter_factory(&MyFormatters {}) +/// ); +/// ``` +pub trait ArrayFormatterFactory: Debug { + /// Creates a new [`ArrayFormatter`] for the given [`Array`] and an optional [`Field`]. If the + /// default implementation should be used, return [`None`]. + /// + /// The field shall be used to look up metadata about the `array` while `options` provide + /// information on formatting, for example, dates and times which should be considered by an + /// implementor. + fn create_display_index<'formatter>( + &self, + array: &'formatter dyn Array, + options: &'formatter FormatOptions<'formatter>, + field: Option<&'formatter Field>, + ) -> Result>, ArrowError>; +} + /// Implements [`Display`] for a specific array value pub struct ValueFormatter<'a> { idx: usize, diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index 02fa917d3ee9..847153359301 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -23,96 +23,12 @@ //! [`Array`]: arrow_array::Array use arrow_array::{Array, ArrayRef, RecordBatch}; -use arrow_schema::{ArrowError, Field, SchemaRef}; +use arrow_schema::{ArrowError, SchemaRef}; use comfy_table::{Cell, Table}; -use std::fmt::{Debug, Display}; +use std::fmt::Display; use crate::display::{ArrayFormatter, FormatOptions}; -/// Allows creating a new [`ArrayFormatter`] for a given [`Array`] and an optional [`Field`]. -/// -/// # Example -/// -/// The example below shows how to create a custom formatter for a custom type `my_money`. -/// -/// ```rust -/// use std::fmt::Write; -/// use arrow_array::{cast::AsArray, Array, Int32Array}; -/// use arrow_cast::display::{ArrayFormatter, DisplayIndex, FormatOptions, FormatResult}; -/// use arrow_cast::pretty::{pretty_format_batches_with_options, ArrayFormatterFactory}; -/// use arrow_schema::{ArrowError, Field}; -/// -/// /// A custom formatter factory that can create a formatter for the special type `my_money`. -/// /// -/// /// This struct could have access to some kind of extension type registry that can lookup the -/// /// correct formatter for an extension type on-demand. -/// #[derive(Debug)] -/// struct MyFormatters {} -/// -/// impl ArrayFormatterFactory for MyFormatters { -/// fn create_display_index<'formatter>( -/// &self, -/// array: &'formatter dyn Array, -/// options: &'formatter FormatOptions<'formatter>, -/// field: Option<&'formatter Field>, -/// ) -> Result>, ArrowError> { -/// // check if this is the money type -/// if field -/// .map(|f| f.extension_type_name() == Some("my_money")) -/// .unwrap_or(false) -/// { -/// // We assume that my_money always is an Int32. -/// let array = array.as_primitive(); -/// let display_index = Box::new(MyMoneyFormatter { array, options }); -/// return Ok(Some(ArrayFormatter::new(display_index, options.safe()))); -/// } -/// -/// Ok(None) // None indicates that the default formatter should be used. -/// } -/// } -/// -/// /// A formatter for the type `my_money` that wraps a specific array and has access to the -/// /// formatting options. -/// struct MyMoneyFormatter<'a> { -/// array: &'a Int32Array, -/// options: &'a FormatOptions<'a>, -/// } -/// -/// impl<'a> DisplayIndex for MyMoneyFormatter<'a> { -/// fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { -/// match self.array.is_valid(idx) { -/// true => write!(f, "{} €", self.array.value(idx))?, -/// false => write!(f, "{}", self.options.null())?, -/// } -/// -/// Ok(()) -/// } -/// } -/// -/// // Usually, here you would provide your record batches. -/// let my_batches = vec![]; -/// -/// // Call the pretty printer with the custom formatter factory. -/// pretty_format_batches_with_options( -/// &my_batches, -/// &FormatOptions::new().with_formatter_factory(&MyFormatters {}) -/// ); -/// ``` -pub trait ArrayFormatterFactory: Debug { - /// Creates a new [`ArrayFormatter`] for the given [`Array`] and an optional [`Field`]. If the - /// default implementation should be used, return [`None`]. - /// - /// The field shall be used to look up metadata about the `array` while `options` provide - /// information on formatting, for example, dates and times which should be considered by an - /// implementor. - fn create_display_index<'formatter>( - &self, - array: &'formatter dyn Array, - options: &'formatter FormatOptions<'formatter>, - field: Option<&'formatter Field>, - ) -> Result>, ArrowError>; -} - /// Create a visual representation of [`RecordBatch`]es /// /// Uses default values for display. See [`pretty_format_batches_with_options`] @@ -369,7 +285,9 @@ mod tests { use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer}; use arrow_schema::*; - use crate::display::{DisplayIndex, DurationFormat, array_value_to_string}; + use crate::display::{ + ArrayFormatterFactory, DisplayIndex, DurationFormat, array_value_to_string, + }; use super::*;