Skip to content

Commit 316b6d5

Browse files
committed
draft-date-part
1 parent d3cedb2 commit 316b6d5

2 files changed

Lines changed: 91 additions & 6 deletions

File tree

datafusion/functions/src/datetime/date_part.rs

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::sync::Arc;
2121
use arrow::array::timezone::Tz;
2222
use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, Int64Array};
2323
use arrow::compute::kernels::cast_utils::IntervalUnit;
24-
use arrow::compute::{DatePart, binary, date_part};
24+
use arrow::compute::{DatePart, binary, cast, date_part};
2525
use arrow::datatypes::DataType::{
2626
Date32, Date64, Duration, Interval, Time32, Time64, Timestamp,
2727
};
@@ -37,11 +37,12 @@ use datafusion_common::types::{NativeType, logical_date};
3737
use datafusion_common::{
3838
Result, ScalarValue,
3939
cast::{
40-
as_date32_array, as_date64_array, as_int32_array, as_interval_dt_array,
41-
as_interval_mdn_array, as_interval_ym_array, as_time32_millisecond_array,
42-
as_time32_second_array, as_time64_microsecond_array, as_time64_nanosecond_array,
43-
as_timestamp_microsecond_array, as_timestamp_millisecond_array,
44-
as_timestamp_nanosecond_array, as_timestamp_second_array,
40+
as_date32_array, as_date64_array, as_duration_nanosecond_array, as_int32_array,
41+
as_interval_dt_array, as_interval_mdn_array, as_interval_ym_array,
42+
as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array,
43+
as_time64_nanosecond_array, as_timestamp_microsecond_array,
44+
as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
45+
as_timestamp_second_array,
4546
},
4647
exec_err, internal_err, not_impl_err,
4748
types::logical_string,
@@ -217,6 +218,8 @@ impl ScalarUDFImpl for DatePartFunc {
217218

218219
// using IntervalUnit here means we hand off all the work of supporting plurals (like "seconds")
219220
// and synonyms ( like "ms,msec,msecond,millisecond") to Arrow
221+
let is_duration = matches!(array.data_type(), Duration(_));
222+
220223
let arr = if let Ok(interval_unit) = IntervalUnit::from_str(part_trim) {
221224
match interval_unit {
222225
IntervalUnit::Year => date_part(array.as_ref(), DatePart::Year)?,
@@ -225,6 +228,14 @@ impl ScalarUDFImpl for DatePartFunc {
225228
IntervalUnit::Day => date_part(array.as_ref(), DatePart::Day)?,
226229
IntervalUnit::Hour => date_part(array.as_ref(), DatePart::Hour)?,
227230
IntervalUnit::Minute => date_part(array.as_ref(), DatePart::Minute)?,
231+
IntervalUnit::Second
232+
| IntervalUnit::Millisecond
233+
| IntervalUnit::Microsecond
234+
| IntervalUnit::Nanosecond
235+
if is_duration =>
236+
{
237+
duration_part(array.as_ref(), interval_unit)?
238+
}
228239
IntervalUnit::Second => seconds_as_i32(array.as_ref(), Second)?,
229240
IntervalUnit::Millisecond => seconds_as_i32(array.as_ref(), Millisecond)?,
230241
IntervalUnit::Microsecond => seconds_as_i32(array.as_ref(), Microsecond)?,
@@ -564,3 +575,34 @@ fn seconds_ns(array: &dyn Array) -> Result<ArrayRef> {
564575
Ok(Arc::new(r))
565576
}
566577
}
578+
579+
/// Extract second/millisecond/microsecond/nanosecond component from Duration arrays.
580+
///
581+
/// Arrow's `date_part` for Duration types returns total values (e.g., total seconds),
582+
/// but PostgreSQL semantics require component values (seconds within the current minute).
583+
/// This function casts to Duration(Nanosecond), then extracts the sub-minute component
584+
/// to avoid i32 overflow.
585+
fn duration_part(array: &dyn Array, unit: IntervalUnit) -> Result<ArrayRef> {
586+
const NANOS_PER_MINUTE: i64 = 60 * 1_000_000_000;
587+
588+
let nanos_array = cast(array, &Duration(Nanosecond))?;
589+
let arr = as_duration_nanosecond_array(nanos_array.as_ref())?;
590+
591+
match unit {
592+
IntervalUnit::Nanosecond => {
593+
let r: Int64Array = arr.unary(|d| d % NANOS_PER_MINUTE);
594+
Ok(Arc::new(r))
595+
}
596+
IntervalUnit::Second | IntervalUnit::Millisecond | IntervalUnit::Microsecond => {
597+
let divisor: i64 = match unit {
598+
IntervalUnit::Second => 1_000_000_000,
599+
IntervalUnit::Millisecond => 1_000_000,
600+
IntervalUnit::Microsecond => 1_000,
601+
_ => unreachable!(),
602+
};
603+
let r: Int32Array = arr.unary(|d| ((d % NANOS_PER_MINUTE) / divisor) as i32);
604+
Ok(Arc::new(r))
605+
}
606+
_ => exec_err!("duration_part does not support {unit:?}"),
607+
}
608+
}

datafusion/sqllogictest/test_files/datetime/date_part.slt

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,49 @@ SELECT extract(day from arrow_cast(NULL, 'Duration(Second)'))
11741174
----
11751175
NULL
11761176

1177+
# test duration component extraction (not total) - issue #14738
1178+
# 40 minutes = 2400 seconds, microsecond component should be 0
1179+
query I
1180+
SELECT date_part('microsecond', timestamp '1970-01-01T00:40:00' - timestamp '1970-01-01T00:00:00')
1181+
----
1182+
0
1183+
1184+
# 3661 seconds = 1 hour 1 minute 1 second, second component = 1
1185+
query I
1186+
SELECT extract(second from arrow_cast(3661, 'Duration(Second)'))
1187+
----
1188+
1
1189+
1190+
# 3661 seconds, millisecond component = 1000 (1 second * 1000)
1191+
query I
1192+
SELECT extract(millisecond from arrow_cast(3661, 'Duration(Second)'))
1193+
----
1194+
1000
1195+
1196+
# 3661 seconds, microsecond component = 1000000 (1 second * 1000000)
1197+
query I
1198+
SELECT extract(microsecond from arrow_cast(3661, 'Duration(Second)'))
1199+
----
1200+
1000000
1201+
1202+
# 86400000000 microseconds = 1 day exactly, microsecond component = 0
1203+
query I
1204+
SELECT extract(microsecond from arrow_cast(86400000000, 'Duration(Microsecond)'))
1205+
----
1206+
0
1207+
1208+
# 3661000 milliseconds = 1h1m1s, millisecond component = 1000
1209+
query I
1210+
SELECT extract(millisecond from arrow_cast(3661000, 'Duration(Millisecond)'))
1211+
----
1212+
1000
1213+
1214+
# Large duration: 2400 seconds, second component = 0
1215+
query I
1216+
SELECT extract(second from arrow_cast(2400000000000, 'Duration(Nanosecond)'))
1217+
----
1218+
0
1219+
11771220
# test_extract_date_part_func
11781221

11791222
query B

0 commit comments

Comments
 (0)