diff --git a/CHANGELOG.md b/CHANGELOG.md index 72e24de..85afa75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Versioning](https://semver.org/spec/v2.0.0.html). ### Changed +- Replaced `chrono` with `jiff` for date/time handling. - Requires Rust 1.84 or newer. - Updated arrow to version 56. diff --git a/Cargo.toml b/Cargo.toml index cb8d275..d726949 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,9 +19,9 @@ codecov = { repository = "petabi/structured", service = "github" } [dependencies] arrow = "56" -chrono = { version = "0.4.31", default-features = false, features = ["serde"] } csv-core = "0.1" itertools = "0.14" +jiff = { version = "0.2.15", features = ["serde"] } num-traits = "0.2" ordered-float = { version = "5", default-features = false } percent-encoding = "2.1" diff --git a/src/csv/reader.rs b/src/csv/reader.rs index 97abd17..1c0e225 100644 --- a/src/csv/reader.rs +++ b/src/csv/reader.rs @@ -151,8 +151,8 @@ impl From for ParseError { } } -impl From for ParseError { - fn from(error: chrono::format::ParseError) -> Self { +impl From for ParseError { + fn from(error: jiff::Error) -> Self { Self { inner: Box::new(error), } @@ -261,12 +261,13 @@ where /// Parses timestamp in RFC 3339 format. fn parse_timestamp(v: &[u8]) -> Result { - Ok( - chrono::NaiveDateTime::parse_from_str(str::from_utf8(v)?, "%Y-%m-%dT%H:%M:%S%.f%:z")? - .and_utc() - .timestamp_nanos_opt() - .unwrap_or_default(), - ) + let dt = jiff::civil::DateTime::strptime("%Y-%m-%dT%H:%M:%S%.f%:z", str::from_utf8(v)?)?; + Ok(dt + .to_zoned(jiff::tz::TimeZone::UTC)? + .timestamp() + .as_nanosecond() + .try_into() + .unwrap_or_default()) } #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] @@ -476,8 +477,8 @@ mod tests { use std::net::Ipv4Addr; use arrow::array::{Array, BinaryArray, StringArray}; - use chrono::{NaiveDate, NaiveDateTime}; use itertools::izip; + use jiff::civil::{date, DateTime}; use serde_test::{assert_tokens, Token}; use super::*; @@ -497,35 +498,14 @@ mod tests { Ipv4Addr::new(127, 0, 0, 3), ]; let c3_v: Vec = vec![2.2, 3.11, 122.8, 5.3123, 7.0, 10320.811, 5.5]; - let c4_v: Vec = vec![ - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(6, 10, 11) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(6, 15, 11) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 21) - .unwrap() - .and_hms_opt(20, 10, 11) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 21) - .unwrap() - .and_hms_opt(20, 10, 11) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(6, 45, 11) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 21) - .unwrap() - .and_hms_opt(8, 10, 11) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(9, 10, 11) - .unwrap(), + let c4_v: Vec = vec![ + date(2019, 9, 22).at(6, 10, 11, 0), + date(2019, 9, 22).at(6, 15, 11, 0), + date(2019, 9, 21).at(20, 10, 11, 0), + date(2019, 9, 21).at(20, 10, 11, 0), + date(2019, 9, 22).at(6, 45, 11, 0), + date(2019, 9, 21).at(8, 10, 11, 0), + date(2019, 9, 22).at(9, 10, 11, 0), ]; let fields = ["t1", "t2", "t3"]; @@ -543,7 +523,6 @@ mod tests { ]; let mut data = vec![]; - let fmt = "%Y-%m-%d %H:%M:%S"; for (c0, c1, c2, c3, c4, c5, c6) in izip!( c0_v.iter(), c1_v.iter(), @@ -562,7 +541,7 @@ mod tests { row.extend_from_slice(b","); row.extend(c3.to_string().into_bytes()); row.extend_from_slice(b","); - row.extend(c4.format(fmt).to_string().into_bytes()); + row.extend(c4.strftime("%Y-%m-%d %H:%M:%S").to_string().into_bytes()); row.extend_from_slice(b","); row.extend((*c5).to_string().into_bytes()); row.extend_from_slice(b","); @@ -583,7 +562,12 @@ mod tests { let c3 = Column::try_from_slice::(&c3_v).unwrap(); let c4 = Column::try_from_slice::( c4_v.iter() - .map(|v| v.and_utc().timestamp()) + .map(|v| { + v.to_zoned(jiff::tz::TimeZone::UTC) + .unwrap() + .timestamp() + .as_second() + }) .collect::>() .as_slice(), ) @@ -637,9 +621,11 @@ mod tests { FieldParser::float64(), FieldParser::timestamp_with_parser(move |v| { let val: String = v.iter().map(|&c| c as char).collect(); - Ok(NaiveDateTime::parse_from_str(&val, "%Y-%m-%d %H:%M:%S")? - .and_utc() - .timestamp()) + let dt = jiff::civil::DateTime::strptime("%Y-%m-%d %H:%M:%S", &val)?; + Ok(dt + .to_zoned(jiff::tz::TimeZone::UTC)? + .timestamp() + .as_second()) }), FieldParser::Utf8, FieldParser::Binary, diff --git a/src/stats.rs b/src/stats.rs index e4479bd..01a4f6e 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -5,7 +5,7 @@ use std::iter::Iterator; use std::net::{IpAddr, Ipv4Addr}; use arrow::datatypes::{Float64Type, Int64Type, UInt32Type, UInt64Type}; -use chrono::{DateTime, NaiveDateTime}; +use jiff::civil::DateTime; use num_traits::ToPrimitive; use serde::{Deserialize, Serialize}; use statistical::{mean, population_standard_deviation}; @@ -26,7 +26,7 @@ pub enum Element { Text(String), Binary(Vec), IpAddr(IpAddr), - DateTime(NaiveDateTime), + DateTime(DateTime), } #[derive(Debug, PartialEq, Clone, Serialize, Deserialize, Eq, Hash)] @@ -36,7 +36,7 @@ pub enum GroupElement { Enum(String), Text(String), IpAddr(IpAddr), - DateTime(NaiveDateTime), + DateTime(DateTime), } #[derive(Debug, Default, PartialEq, Clone, Serialize, Deserialize)] @@ -508,7 +508,7 @@ pub(crate) fn n_largest_count_datetime( values.iter(), rows.len(), n_largest_count, - &NaiveDateTime, + &DateTime, Element::DateTime, number_of_top_n ); @@ -524,7 +524,7 @@ pub(crate) fn convert_time_intervals( column: &Column, rows: &[usize], time_interval: u32, -) -> Vec { +) -> Vec { const A_BILLION: i64 = 1_000_000_000; let time_interval = if time_interval > MAX_TIME_INTERVAL { MAX_TIME_INTERVAL @@ -547,9 +547,10 @@ pub(crate) fn convert_time_intervals( // The first interval of each day should start with 00:00:00. let mut interval_idx = v / A_BILLION; interval_idx = (interval_idx / time_interval) * time_interval; - DateTime::from_timestamp(interval_idx, 0) + jiff::Timestamp::from_second(interval_idx) .unwrap_or_default() - .naive_utc() + .to_zoned(jiff::tz::TimeZone::UTC) + .datetime() }) .collect::>() } @@ -630,7 +631,7 @@ where #[cfg(test)] mod tests { use arrow::datatypes::Int64Type; - use chrono::NaiveDate; + use jiff::civil::date; use super::*; use crate::Column; @@ -638,54 +639,61 @@ mod tests { #[test] fn test_convert_time_intervals() { let c4_v: Vec = vec![ - NaiveDate::from_ymd_opt(2019, 9, 22) + date(2019, 9, 22) + .at(6, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(6, 10, 11) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(6, 15, 11) + date(2019, 9, 22) + .at(6, 15, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 21) + date(2019, 9, 21) + .at(20, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(20, 10, 11) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 21) - .unwrap() - .and_hms_opt(20, 10, 11) + date(2019, 9, 21) + .at(20, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) + date(2019, 9, 22) + .at(6, 45, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(6, 45, 11) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 21) - .unwrap() - .and_hms_opt(8, 10, 11) + date(2019, 9, 21) + .at(8, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) + date(2019, 9, 22) + .at(9, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(9, 10, 11) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), ]; let c4 = Column::try_from_slice::(&c4_v).unwrap(); @@ -693,56 +701,44 @@ mod tests { let time_interval = 3600; let rst = convert_time_intervals(&c4, &rows, time_interval); assert_eq!(rst.len(), 7); - assert_eq!( - rst.first(), - Some( - &NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(6, 0, 0) - .unwrap() - ) - ); - assert_eq!( - rst.last(), - Some( - &NaiveDate::from_ymd_opt(2019, 9, 21) - .unwrap() - .and_hms_opt(8, 0, 0) - .unwrap() - ) - ); + assert_eq!(rst.first(), Some(&date(2019, 9, 22).at(6, 0, 0, 0))); + assert_eq!(rst.last(), Some(&date(2019, 9, 21).at(8, 0, 0, 0))); } #[test] fn test_the_first_interval_of_each_day() { let c4_v: Vec = vec![ - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(0, 3, 20) + date(2019, 9, 22) + .at(0, 3, 20, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) + date(2019, 9, 22) + .at(0, 9, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(0, 9, 11) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(0, 10, 11) + date(2019, 9, 22) + .at(0, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) + date(2019, 9, 22) + .at(1, 15, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(1, 15, 11) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), ]; let c4 = Column::try_from_slice::(&c4_v).unwrap(); @@ -750,22 +746,10 @@ mod tests { let time_interval = 3600; let rst = convert_time_intervals(&c4, &rows, time_interval); let converted = [ - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(1, 0, 0) - .unwrap(), + date(2019, 9, 22).at(0, 0, 0, 0), + date(2019, 9, 22).at(0, 0, 0, 0), + date(2019, 9, 22).at(0, 0, 0, 0), + date(2019, 9, 22).at(1, 0, 0, 0), ]; for (seq, c) in converted.iter().enumerate() { assert_eq!(rst.get(seq), Some(c)); @@ -774,22 +758,10 @@ mod tests { let time_interval = 600; let rst = convert_time_intervals(&c4, &rows, time_interval); let converted = [ - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(0, 10, 0) - .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(1, 10, 0) - .unwrap(), + date(2019, 9, 22).at(0, 0, 0, 0), + date(2019, 9, 22).at(0, 0, 0, 0), + date(2019, 9, 22).at(0, 10, 0, 0), + date(2019, 9, 22).at(1, 10, 0, 0), ]; for (seq, c) in converted.iter().enumerate() { assert_eq!(rst.get(seq), Some(c)); diff --git a/src/table.rs b/src/table.rs index 03f5a29..cfb576c 100644 --- a/src/table.rs +++ b/src/table.rs @@ -661,7 +661,7 @@ mod tests { use ahash::AHasher; use arrow::datatypes::{Field, Float64Type, UInt32Type, UInt64Type}; - use chrono::NaiveDate; + use jiff::civil::date; use super::*; use crate::Column; @@ -718,61 +718,69 @@ mod tests { Field::new("", DataType::Int64, false), ]); let c0_v: Vec = vec![ - NaiveDate::from_ymd_opt(2020, 1, 1) + date(2020, 1, 1) + .at(0, 0, 10, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(0, 0, 10) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2020, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 13) + date(2020, 1, 1) + .at(0, 0, 13, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2020, 1, 1) + date(2020, 1, 1) + .at(0, 0, 15, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(0, 0, 15) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2020, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 22) + date(2020, 1, 1) + .at(0, 0, 22, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2020, 1, 1) + date(2020, 1, 1) + .at(0, 0, 22, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(0, 0, 22) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2020, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 31) + date(2020, 1, 1) + .at(0, 0, 31, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2020, 1, 1) + date(2020, 1, 1) + .at(0, 0, 33, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(0, 0, 33) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2020, 1, 1) - .unwrap() - .and_hms_opt(0, 1, 1) + date(2020, 1, 1) + .at(0, 1, 1, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), ]; let c1_v: Vec = vec![1, 32, 3, 5, 2, 1, 3, 24]; @@ -825,54 +833,61 @@ mod tests { ]; let c3_v: Vec = vec![2.2, 2.203, 2.8, 5.30123, 7.0, 10320.811, 5.3009]; let c4_v: Vec = vec![ - NaiveDate::from_ymd_opt(2019, 9, 22) + date(2019, 9, 22) + .at(6, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(6, 10, 11) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(6, 15, 11) + date(2019, 9, 22) + .at(6, 15, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 21) + date(2019, 9, 21) + .at(20, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(20, 10, 11) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 21) - .unwrap() - .and_hms_opt(20, 10, 11) + date(2019, 9, 21) + .at(20, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) + date(2019, 9, 22) + .at(6, 45, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_hms_opt(6, 45, 11) - .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 21) - .unwrap() - .and_hms_opt(8, 10, 11) + date(2019, 9, 21) + .at(8, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(9, 10, 11) + date(2019, 9, 22) + .at(9, 10, 11, 0) + .to_zoned(jiff::tz::TimeZone::UTC) .unwrap() - .and_utc() - .timestamp_nanos_opt() + .timestamp() + .as_nanosecond() + .try_into() .unwrap(), ]; let tester = ["t1".to_string(), "t2".to_string(), "t3".to_string()]; @@ -935,12 +950,7 @@ mod tests { ); assert_eq!(5, stat[3].n_largest_count.number_of_elements()); assert_eq!( - Element::DateTime( - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(6, 0, 0) - .unwrap() - ), + Element::DateTime(date(2019, 9, 22).at(6, 0, 0, 0)), stat[4].n_largest_count.top_n()[0].value ); assert_eq!(3, stat[5].n_largest_count.number_of_elements()); @@ -978,12 +988,7 @@ mod tests { ); assert_eq!(5, stat[3].n_largest_count.number_of_elements()); assert_eq!( - Element::DateTime( - NaiveDate::from_ymd_opt(2019, 9, 22) - .unwrap() - .and_hms_opt(6, 0, 0) - .unwrap() - ), + Element::DateTime(date(2019, 9, 22).at(6, 0, 0, 0)), stat[4].n_largest_count.top_n()[0].value ); assert_eq!(