diff --git a/velox/docs/functions/spark/datetime.rst b/velox/docs/functions/spark/datetime.rst index d87f5819885a..09446378407a 100644 --- a/velox/docs/functions/spark/datetime.rst +++ b/velox/docs/functions/spark/datetime.rst @@ -244,6 +244,20 @@ These functions support TIMESTAMP and DATE input types. SELECT month('2009-07-30'); -- 7 +.. spark:function:: months_between(timestamp1, timestamp2, roundOff) -> double + + Returns number of months between times ``timestamp1`` and ``timestamp2``. + If ``timestamp1`` is later than ``timestamp2``, the result is positive. + If ``timestamp1`` and ``timestamp2`` are on the same day of month, or both are the + last day of month, time of day will be ignored. Otherwise, the difference is calculated + based on 31 days per month, and rounded to 8 digits unless ``roundOff`` is false. :: + + SELECT months_between('1997-02-28 10:30:00', '1996-10-30', true); -- 3.94959677 + SELECT months_between('1997-02-28 10:30:00', '1996-10-30', false); -- 3.9495967741935485 + SELECT months_between('1997-02-28 10:30:00', '1996-03-31 11:00:00', true); -- 11.0 + SELECT months_between('1997-02-28 10:30:00', '1996-03-28 11:00:00', true); -- 11.0 + SELECT months_between('1997-02-21 10:30:00', '1996-03-21 11:00:00', true); -- 11.0 + .. spark:function:: next_day(startDate, dayOfWeek) -> date Returns the first date which is later than ``startDate`` and named as ``dayOfWeek``. diff --git a/velox/functions/lib/TimeUtils.h b/velox/functions/lib/TimeUtils.h index 424a4ba3d3a2..4e1c1908d26c 100644 --- a/velox/functions/lib/TimeUtils.h +++ b/velox/functions/lib/TimeUtils.h @@ -27,6 +27,9 @@ namespace facebook::velox::functions { +inline constexpr int64_t kSecondsInMinute = 60; +inline constexpr int64_t kMinutesInHour = 60; +inline constexpr int64_t kSecondsInHour = kSecondsInMinute * kMinutesInHour; inline constexpr int64_t kSecondsInDay = 86'400; inline constexpr int64_t kDaysInWeek = 7; extern const folly::F14FastMap kDayOfWeekNames; diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 5dba18c95d76..c3f9a2889eca 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -1082,4 +1082,60 @@ struct TimestampAddFunction { std::optional unit_ = std::nullopt; }; +template +struct MonthsBetweenFunction { + VELOX_DEFINE_FUNCTION_TYPES(TExec); + + FOLLY_ALWAYS_INLINE void initialize( + const std::vector& /*inputTypes*/, + const core::QueryConfig& config, + const arg_type* /*timestamp1*/, + const arg_type* /*timestamp2*/, + const arg_type* /*roundOff*/) { + sessionTimeZone_ = getTimeZoneFromConfig(config); + } + + FOLLY_ALWAYS_INLINE void call( + out_type& result, + const arg_type& timestamp1, + const arg_type& timestamp2, + const arg_type& roundOff) { + const auto dateTime1 = getDateTime(timestamp1, sessionTimeZone_); + const auto dateTime2 = getDateTime(timestamp2, sessionTimeZone_); + result = monthsBetween(dateTime1, dateTime2, roundOff); + } + + private: + FOLLY_ALWAYS_INLINE bool isEndDayOfMonth(const std::tm& tm) { + return tm.tm_mday == util::getMaxDayOfMonth(getYear(tm), getMonth(tm)); + } + + FOLLY_ALWAYS_INLINE double + monthsBetween(const std::tm& tm1, const std::tm& tm2, bool roundOff) { + const double monthDiff = + (tm1.tm_year - tm2.tm_year) * kMonthInYear + tm1.tm_mon - tm2.tm_mon; + if (tm1.tm_mday == tm2.tm_mday || + (isEndDayOfMonth(tm1) && isEndDayOfMonth(tm2))) { + return monthDiff; + } + const auto secondsInDay1 = tm1.tm_hour * kSecondsInHour + + tm1.tm_min * kSecondsInMinute + tm1.tm_sec; + const auto secondsInDay2 = tm2.tm_hour * kSecondsInHour + + tm2.tm_min * kSecondsInMinute + tm2.tm_sec; + const auto secondsDiff = (tm1.tm_mday - tm2.tm_mday) * kSecondsInDay + + secondsInDay1 - secondsInDay2; + const auto diff = + monthDiff + static_cast(secondsDiff) / kSecondsInMonth; + if (roundOff) { + return round(diff * kRoundingPrecision) / kRoundingPrecision; + } + return diff; + } + + // Precision factor for 8 decimal places rounding. + static constexpr int64_t kRoundingPrecision = 1e8; + static constexpr int64_t kSecondsInMonth = kSecondsInDay * 31; + const tz::TimeZone* sessionTimeZone_ = nullptr; +}; + } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/registration/RegisterDatetime.cpp b/velox/functions/sparksql/registration/RegisterDatetime.cpp index c3b758ce3f89..455262d95980 100644 --- a/velox/functions/sparksql/registration/RegisterDatetime.cpp +++ b/velox/functions/sparksql/registration/RegisterDatetime.cpp @@ -111,6 +111,8 @@ void registerDatetimeFunctions(const std::string& prefix) { Varchar, int32_t, Timestamp>({prefix + "timestampadd"}); + registerFunction( + {prefix + "months_between"}); } } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index f26cbc020afa..ee0ef748e9d9 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -1796,5 +1796,59 @@ TEST_F(DateTimeFunctionsTest, timestampadd) { 10, Timestamp(1582970400, 500'999'999) /*2020-02-29 10:00:00.500*/)); } + +TEST_F(DateTimeFunctionsTest, monthsBetween) { + const auto monthsBetween = [&](std::optional timestamp1, + std::optional timestamp2, + std::optional roundOff) { + return evaluateOnce( + "months_between(c0, c1, c2)", timestamp1, timestamp2, roundOff); + }; + + EXPECT_EQ( + 3.94959677, + monthsBetween( + parseTimestamp("1997-02-28 10:30:00"), + parseTimestamp("1996-10-30"), + true)); + EXPECT_EQ( + 3.9495967741935485, + monthsBetween( + parseTimestamp("1997-02-28 10:30:00"), + parseTimestamp("1996-10-30"), + false)); + EXPECT_EQ( + 3.9495949074074073, + monthsBetween( + parseTimestamp("1997-02-28 10:30:00"), + parseTimestamp("1996-10-30 00:00:05"), + false)); + EXPECT_EQ( + -3.9495949074074073, + monthsBetween( + parseTimestamp("1996-10-30 00:00:05"), + parseTimestamp("1997-02-28 10:30:00"), + false)); + // `timestamp1` and `timestamp2` both are the last day of month. + EXPECT_EQ( + 11, + monthsBetween( + parseTimestamp("1997-02-28 10:30:00"), + parseTimestamp("1996-03-31 11:00:00"), + true)); + // `timestamp1` and `timestamp2` are on the same day of month. + EXPECT_EQ( + 11, + monthsBetween( + parseTimestamp("1997-02-28 10:30:00"), + parseTimestamp("1996-03-28 11:00:00"), + true)); + EXPECT_EQ( + 11, + monthsBetween( + parseTimestamp("1997-02-21 10:30:00"), + parseTimestamp("1996-03-21 11:00:00"), + true)); +} } // namespace } // namespace facebook::velox::functions::sparksql::test