From 0b31f8e9b327a23ae1a9d951c8b0cd129cfae539 Mon Sep 17 00:00:00 2001 From: zouyunhe Date: Tue, 2 Sep 2025 02:58:33 +0000 Subject: [PATCH 1/9] add support for proctime --- velox/functions/sparksql/DateTimeFunctions.h | 11 +++++++++++ .../sparksql/registration/RegisterDatetime.cpp | 2 ++ 2 files changed, 13 insertions(+) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 2da694fb9e0a..56d2268e5edf 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -143,6 +143,17 @@ struct UnixTimestampFunction { } }; +template +struct CurrentTimestampFunction { + VELOX_DEFINE_FUNCTION_TYPES(T); + + FOLLY_ALWAYS_INLINE bool call( + out_type& result) { + result = Timestamp::now(); + return true; + } +}; + template struct UnixTimestampParseFunction { VELOX_DEFINE_FUNCTION_TYPES(T); diff --git a/velox/functions/sparksql/registration/RegisterDatetime.cpp b/velox/functions/sparksql/registration/RegisterDatetime.cpp index 0625ae202544..6c5005a4dae2 100644 --- a/velox/functions/sparksql/registration/RegisterDatetime.cpp +++ b/velox/functions/sparksql/registration/RegisterDatetime.cpp @@ -94,6 +94,8 @@ void registerDatetimeFunctions(const std::string& prefix) { {prefix + "timestamp_millis"}); registerFunction( {prefix + "date_trunc"}); + registerFunction( + {prefix + "current_timestamp"}); } } // namespace facebook::velox::functions::sparksql From 357b8d96a15829a5272e0681670ebae3d299444a Mon Sep 17 00:00:00 2001 From: zouyunhe Date: Tue, 2 Sep 2025 03:13:48 +0000 Subject: [PATCH 2/9] support split_index --- velox/functions/sparksql/String.h | 4 ++++ velox/functions/sparksql/registration/RegisterString.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/velox/functions/sparksql/String.h b/velox/functions/sparksql/String.h index b90b9b851d33..55185c16d32b 100644 --- a/velox/functions/sparksql/String.h +++ b/velox/functions/sparksql/String.h @@ -22,6 +22,7 @@ #include "velox/functions/Macros.h" #include "velox/functions/lib/string/StringCore.h" #include "velox/functions/lib/string/StringImpl.h" +#include "velox/functions/prestosql/SplitPart.h" namespace facebook::velox::functions::sparksql { @@ -1556,4 +1557,7 @@ struct Empty2NullFunction { } }; +template +struct SplitIndex : public functions::SplitPart {}; + } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/registration/RegisterString.cpp b/velox/functions/sparksql/registration/RegisterString.cpp index 007b77868a1a..ff6a2a4c4122 100644 --- a/velox/functions/sparksql/registration/RegisterString.cpp +++ b/velox/functions/sparksql/registration/RegisterString.cpp @@ -147,6 +147,8 @@ void registerStringFunctions(const std::string& prefix) { registerFunctionCallToSpecialForm( ConcatWsCallToSpecialForm::kConcatWs, std::make_unique()); + registerFunction( + {prefix + "split_index"}); } } // namespace sparksql } // namespace facebook::velox::functions From 237c7a6d4d8f3067659094c2a7bdb8f51aec4604 Mon Sep 17 00:00:00 2001 From: zouyunhe Date: Fri, 5 Sep 2025 07:15:02 +0000 Subject: [PATCH 3/9] add tests first --- .../dwio/parquet/writer/arrow/CMakeLists.txt | 6 +- .../functions/sparksql/fuzzer/CMakeLists.txt | 6 +- .../sparksql/tests/ArithmeticTest.cpp | 102 ++++---- .../sparksql/tests/AtLeastNNonNullsTest.cpp | 34 +-- .../sparksql/tests/DateTimeFunctionsTest.cpp | 36 +-- .../sparksql/tests/ElementAtTest.cpp | 50 ++-- .../functions/sparksql/tests/FromJsonTest.cpp | 28 +- .../sparksql/tests/GetJsonObjectTest.cpp | 40 +-- .../sparksql/tests/GetStructFieldTest.cpp | 38 +-- .../sparksql/tests/MakeDecimalTest.cpp | 92 +++---- .../sparksql/tests/MakeTimestampTest.cpp | 242 +++++++++--------- velox/functions/sparksql/tests/MaskTest.cpp | 42 +-- .../sparksql/tests/RegexFunctionsTest.cpp | 28 +- .../sparksql/tests/SortArrayTest.cpp | 16 +- .../sparksql/tests/SparkPartitionIdTest.cpp | 22 +- velox/functions/sparksql/tests/StringTest.cpp | 11 + 16 files changed, 402 insertions(+), 391 deletions(-) diff --git a/velox/dwio/parquet/writer/arrow/CMakeLists.txt b/velox/dwio/parquet/writer/arrow/CMakeLists.txt index fb0e4a4bb7df..4b91c85aebf6 100644 --- a/velox/dwio/parquet/writer/arrow/CMakeLists.txt +++ b/velox/dwio/parquet/writer/arrow/CMakeLists.txt @@ -14,9 +14,9 @@ add_subdirectory(util) -if(${VELOX_BUILD_TESTING}) - add_subdirectory(tests) -endif() +#if(${VELOX_BUILD_TESTING}) + # add_subdirectory(tests) +#endif() velox_add_library( velox_dwio_arrow_parquet_writer_lib diff --git a/velox/functions/sparksql/fuzzer/CMakeLists.txt b/velox/functions/sparksql/fuzzer/CMakeLists.txt index ecb0ac20f709..2030960dfd87 100644 --- a/velox/functions/sparksql/fuzzer/CMakeLists.txt +++ b/velox/functions/sparksql/fuzzer/CMakeLists.txt @@ -113,6 +113,6 @@ target_link_libraries( GTest::gtest GTest::gtest_main) -if(${VELOX_BUILD_TESTING}) - add_subdirectory(tests) -endif() +#if(${VELOX_BUILD_TESTING}) +# add_subdirectory(tests) +#endif() diff --git a/velox/functions/sparksql/tests/ArithmeticTest.cpp b/velox/functions/sparksql/tests/ArithmeticTest.cpp index 72ac45caf0d0..d0dcc111bbea 100644 --- a/velox/functions/sparksql/tests/ArithmeticTest.cpp +++ b/velox/functions/sparksql/tests/ArithmeticTest.cpp @@ -677,57 +677,57 @@ TEST_F(ArithmeticTest, widthBucket) { EXPECT_EQ(widthBucket(-kInf, 0, 4, 3), 0); } -TEST_F(ArithmeticTest, checkedAdd) { - assertErrorForCheckedAdd(INT8_MAX, 1, "Arithmetic overflow: 127 + 1"); - assertErrorForCheckedAdd( - INT16_MAX, 1, "Arithmetic overflow: 32767 + 1"); - assertErrorForCheckedAdd( - INT32_MAX, 1, "Arithmetic overflow: 2147483647 + 1"); - assertErrorForCheckedAdd( - INT64_MAX, 1, "Arithmetic overflow: 9223372036854775807 + 1"); - EXPECT_EQ(checkedAdd(kInf, 1), kInf); - EXPECT_EQ(checkedAdd(kInfDouble, 1), kInfDouble); -} - -TEST_F(ArithmeticTest, checkedSubtract) { - assertErrorForcheckedSubtract( - INT8_MIN, 1, "Arithmetic overflow: -128 - 1"); - assertErrorForcheckedSubtract( - INT16_MIN, 1, "Arithmetic overflow: -32768 - 1"); - assertErrorForcheckedSubtract( - INT32_MIN, 1, "Arithmetic overflow: -2147483648 - 1"); - assertErrorForcheckedSubtract( - INT64_MIN, 1, "Arithmetic overflow: -9223372036854775808 - 1"); - EXPECT_EQ(checkedSubtract(kInf, 1), kInf); - EXPECT_EQ(checkedSubtract(kInfDouble, 1), kInfDouble); -} - -TEST_F(ArithmeticTest, checkedMultiply) { - assertErrorForCheckedMultiply( - INT8_MAX, 2, "Arithmetic overflow: 127 * 2"); - assertErrorForCheckedMultiply( - INT16_MAX, 2, "Arithmetic overflow: 32767 * 2"); - assertErrorForCheckedMultiply( - INT32_MAX, 2, "Arithmetic overflow: 2147483647 * 2"); - assertErrorForCheckedMultiply( - INT64_MAX, 2, "Arithmetic overflow: 9223372036854775807 * 2"); - EXPECT_EQ(checkedMultiply(kInf, 1), kInf); - EXPECT_EQ(checkedMultiply(kInfDouble, 1), kInfDouble); -} - -TEST_F(ArithmeticTest, checkedDivide) { - assertErrorForCheckedDivide(1, 0, "division by zero"); - assertErrorForCheckedDivide( - INT8_MIN, -1, "Arithmetic overflow: -128 / -1"); - assertErrorForCheckedDivide( - INT16_MIN, -1, "Arithmetic overflow: -32768 / -1"); - assertErrorForCheckedDivide( - INT32_MIN, -1, "Arithmetic overflow: -2147483648 / -1"); - assertErrorForCheckedDivide( - INT64_MIN, -1, "Arithmetic overflow: -9223372036854775808 / -1"); - EXPECT_EQ(checkedDivide(kInf, 1), kInf); - EXPECT_EQ(checkedDivide(kInfDouble, 1), kInfDouble); -} +// TEST_F(ArithmeticTest, checkedAdd) { +// assertErrorForCheckedAdd(INT8_MAX, 1, "Arithmetic overflow: 127 + 1"); +// assertErrorForCheckedAdd( +// INT16_MAX, 1, "Arithmetic overflow: 32767 + 1"); +// assertErrorForCheckedAdd( +// INT32_MAX, 1, "Arithmetic overflow: 2147483647 + 1"); +// assertErrorForCheckedAdd( +// INT64_MAX, 1, "Arithmetic overflow: 9223372036854775807 + 1"); +// EXPECT_EQ(checkedAdd(kInf, 1), kInf); +// EXPECT_EQ(checkedAdd(kInfDouble, 1), kInfDouble); +// } + +// TEST_F(ArithmeticTest, checkedSubtract) { +// assertErrorForcheckedSubtract( +// INT8_MIN, 1, "Arithmetic overflow: -128 - 1"); +// assertErrorForcheckedSubtract( +// INT16_MIN, 1, "Arithmetic overflow: -32768 - 1"); +// assertErrorForcheckedSubtract( +// INT32_MIN, 1, "Arithmetic overflow: -2147483648 - 1"); +// assertErrorForcheckedSubtract( +// INT64_MIN, 1, "Arithmetic overflow: -9223372036854775808 - 1"); +// EXPECT_EQ(checkedSubtract(kInf, 1), kInf); +// EXPECT_EQ(checkedSubtract(kInfDouble, 1), kInfDouble); +// } + +// TEST_F(ArithmeticTest, checkedMultiply) { +// assertErrorForCheckedMultiply( +// INT8_MAX, 2, "Arithmetic overflow: 127 * 2"); +// assertErrorForCheckedMultiply( +// INT16_MAX, 2, "Arithmetic overflow: 32767 * 2"); +// assertErrorForCheckedMultiply( +// INT32_MAX, 2, "Arithmetic overflow: 2147483647 * 2"); +// assertErrorForCheckedMultiply( +// INT64_MAX, 2, "Arithmetic overflow: 9223372036854775807 * 2"); +// EXPECT_EQ(checkedMultiply(kInf, 1), kInf); +// EXPECT_EQ(checkedMultiply(kInfDouble, 1), kInfDouble); +// } + +// TEST_F(ArithmeticTest, checkedDivide) { +// assertErrorForCheckedDivide(1, 0, "division by zero"); +// assertErrorForCheckedDivide( +// INT8_MIN, -1, "Arithmetic overflow: -128 / -1"); +// assertErrorForCheckedDivide( +// INT16_MIN, -1, "Arithmetic overflow: -32768 / -1"); +// assertErrorForCheckedDivide( +// INT32_MIN, -1, "Arithmetic overflow: -2147483648 / -1"); +// assertErrorForCheckedDivide( +// INT64_MIN, -1, "Arithmetic overflow: -9223372036854775808 / -1"); +// EXPECT_EQ(checkedDivide(kInf, 1), kInf); +// EXPECT_EQ(checkedDivide(kInfDouble, 1), kInfDouble); +// } class LogNTest : public SparkFunctionBaseTest { protected: diff --git a/velox/functions/sparksql/tests/AtLeastNNonNullsTest.cpp b/velox/functions/sparksql/tests/AtLeastNNonNullsTest.cpp index 8bb373cd9d6a..e2fe7d5f532a 100644 --- a/velox/functions/sparksql/tests/AtLeastNNonNullsTest.cpp +++ b/velox/functions/sparksql/tests/AtLeastNNonNullsTest.cpp @@ -94,23 +94,23 @@ TEST_F(AtLeastNNonNullsTest, basic) { testAtLeastNNonNulls(4, {maps, arrays, consts, dicts}, expected); } -TEST_F(AtLeastNNonNullsTest, error) { - auto input = makeFlatVector({1, 2, 3}); +// TEST_F(AtLeastNNonNullsTest, error) { +// auto input = makeFlatVector({1, 2, 3}); - VELOX_ASSERT_USER_THROW( - evaluate("at_least_n_non_nulls(1.0, c0)", makeRowVector({input})), - "The first input type should be INTEGER but got DOUBLE"); - VELOX_ASSERT_USER_THROW( - evaluate("at_least_n_non_nulls(1)", makeRowVector({})), - "AtLeastNNonNulls expects to receive at least 2 arguments"); - VELOX_ASSERT_USER_THROW( - evaluate("at_least_n_non_nulls(c0, c1)", makeRowVector({input, input})), - "The first parameter should be constant expression"); - VELOX_ASSERT_USER_THROW( - evaluate( - "at_least_n_non_nulls(cast(null as int), c0)", - makeRowVector({input})), - "The first parameter should not be null"); -} +// VELOX_ASSERT_USER_THROW( +// evaluate("at_least_n_non_nulls(1.0, c0)", makeRowVector({input})), +// "The first input type should be INTEGER but got DOUBLE"); +// VELOX_ASSERT_USER_THROW( +// evaluate("at_least_n_non_nulls(1)", makeRowVector({})), +// "AtLeastNNonNulls expects to receive at least 2 arguments"); +// VELOX_ASSERT_USER_THROW( +// evaluate("at_least_n_non_nulls(c0, c1)", makeRowVector({input, input})), +// "The first parameter should be constant expression"); +// VELOX_ASSERT_USER_THROW( +// evaluate( +// "at_least_n_non_nulls(cast(null as int), c0)", +// makeRowVector({input})), +// "The first parameter should not be null"); +// } } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index 80584844bb10..64197efa46e0 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -326,24 +326,24 @@ TEST_F(DateTimeFunctionsTest, unixTimestampTimestampInput) { EXPECT_EQ(kMin, unixTimestamp(Timestamp(kMin, 0))); } -TEST_F(DateTimeFunctionsTest, unixTimestampDateInput) { - const auto unixTimestamp = [&](std::optional date) { - return evaluateOnce("unix_timestamp(c0)", {DATE()}, date); - }; - EXPECT_EQ(0, unixTimestamp(parseDate("1970-01-01"))); - EXPECT_EQ(1727740800, unixTimestamp(parseDate("2024-10-01"))); - EXPECT_EQ(-126065894400, unixTimestamp(parseDate("-2025-02-18"))); - setQueryTimeZone("America/Los_Angeles"); - EXPECT_EQ(1727766000, unixTimestamp(parseDate("2024-10-01"))); - EXPECT_EQ(-126065866022, unixTimestamp(parseDate("-2025-02-18"))); - EXPECT_EQ(2398320000, unixTimestamp(parseDate("2045-12-31"))); - VELOX_ASSERT_USER_THROW( - unixTimestamp(kMax), - "Could not convert date 5881580-07-11 to unix timestamp."); - VELOX_ASSERT_USER_THROW( - unixTimestamp(kMin), - "Could not convert date -5877641-06-23 to unix timestamp."); -} +// TEST_F(DateTimeFunctionsTest, unixTimestampDateInput) { +// const auto unixTimestamp = [&](std::optional date) { +// return evaluateOnce("unix_timestamp(c0)", {DATE()}, date); +// }; +// EXPECT_EQ(0, unixTimestamp(parseDate("1970-01-01"))); +// EXPECT_EQ(1727740800, unixTimestamp(parseDate("2024-10-01"))); +// EXPECT_EQ(-126065894400, unixTimestamp(parseDate("-2025-02-18"))); +// setQueryTimeZone("America/Los_Angeles"); +// EXPECT_EQ(1727766000, unixTimestamp(parseDate("2024-10-01"))); +// EXPECT_EQ(-126065866022, unixTimestamp(parseDate("-2025-02-18"))); +// EXPECT_EQ(2398320000, unixTimestamp(parseDate("2045-12-31"))); +// VELOX_ASSERT_USER_THROW( +// unixTimestamp(kMax), +// "Could not convert date 5881580-07-11 to unix timestamp."); +// VELOX_ASSERT_USER_THROW( +// unixTimestamp(kMin), +// "Could not convert date -5877641-06-23 to unix timestamp."); +// } // unix_timestamp and to_unix_timestamp are aliases. TEST_F(DateTimeFunctionsTest, toUnixTimestamp) { diff --git a/velox/functions/sparksql/tests/ElementAtTest.cpp b/velox/functions/sparksql/tests/ElementAtTest.cpp index 67f33cbe426c..ec450520247b 100644 --- a/velox/functions/sparksql/tests/ElementAtTest.cpp +++ b/velox/functions/sparksql/tests/ElementAtTest.cpp @@ -48,32 +48,32 @@ class ElementAtTest : public SparkFunctionBaseTest { // #1 - start indices at 1. If Index is 0 will throw an error. // #2 - allow out of bounds access for arrays (return null). // #3 - allow negative indices (return elements from the last to the first). -TEST_F(ElementAtTest, allFlavors2) { - auto arrayVector = makeArrayVector({{10, 11, 12}}); +// TEST_F(ElementAtTest, allFlavors2) { +// auto arrayVector = makeArrayVector({{10, 11, 12}}); - // Create a simple vector containing a single map ([10=>10, 11=>11, 12=>12]). - auto keyAt = [](auto idx) { return idx + 10; }; - auto sizeAt = [](auto) { return 3; }; - auto mapValueAt = [](auto idx) { return idx + 10; }; - auto mapVector = - makeMapVector(1, sizeAt, keyAt, mapValueAt); +// // Create a simple vector containing a single map ([10=>10, 11=>11, 12=>12]). +// auto keyAt = [](auto idx) { return idx + 10; }; +// auto sizeAt = [](auto) { return 3; }; +// auto mapValueAt = [](auto idx) { return idx + 10; }; +// auto mapVector = +// makeMapVector(1, sizeAt, keyAt, mapValueAt); - // #1 - EXPECT_EQ(elementAtSimple("element_at(C0, 1)", {arrayVector}), 10); - EXPECT_EQ(elementAtSimple("element_at(C0, 2)", {arrayVector}), 11); - EXPECT_EQ(elementAtSimple("element_at(C0, 3)", {arrayVector}), 12); - VELOX_ASSERT_THROW( - elementAtSimple("element_at(C0, 0)", {arrayVector}), - "SQL array indices start at 1"); - // #2 - EXPECT_EQ(elementAtSimple("element_at(C0, 4)", {arrayVector}), std::nullopt); - EXPECT_EQ(elementAtSimple("element_at(C0, 5)", {arrayVector}), std::nullopt); - EXPECT_EQ(elementAtSimple("element_at(C0, 1001)", {mapVector}), std::nullopt); +// // #1 +// EXPECT_EQ(elementAtSimple("element_at(C0, 1)", {arrayVector}), 10); +// EXPECT_EQ(elementAtSimple("element_at(C0, 2)", {arrayVector}), 11); +// EXPECT_EQ(elementAtSimple("element_at(C0, 3)", {arrayVector}), 12); +// VELOX_ASSERT_THROW( +// elementAtSimple("element_at(C0, 0)", {arrayVector}), +// "SQL array indices start at 1"); +// // #2 +// EXPECT_EQ(elementAtSimple("element_at(C0, 4)", {arrayVector}), std::nullopt); +// EXPECT_EQ(elementAtSimple("element_at(C0, 5)", {arrayVector}), std::nullopt); +// EXPECT_EQ(elementAtSimple("element_at(C0, 1001)", {mapVector}), std::nullopt); - // #3 - EXPECT_EQ(elementAtSimple("element_at(C0, -1)", {arrayVector}), 12); - EXPECT_EQ(elementAtSimple("element_at(C0, -2)", {arrayVector}), 11); - EXPECT_EQ(elementAtSimple("element_at(C0, -3)", {arrayVector}), 10); - EXPECT_EQ(elementAtSimple("element_at(C0, -4)", {arrayVector}), std::nullopt); -} +// // #3 +// EXPECT_EQ(elementAtSimple("element_at(C0, -1)", {arrayVector}), 12); +// EXPECT_EQ(elementAtSimple("element_at(C0, -2)", {arrayVector}), 11); +// EXPECT_EQ(elementAtSimple("element_at(C0, -3)", {arrayVector}), 10); +// EXPECT_EQ(elementAtSimple("element_at(C0, -4)", {arrayVector}), std::nullopt); +// } } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/FromJsonTest.cpp b/velox/functions/sparksql/tests/FromJsonTest.cpp index 117ca6ebd244..c3c8fbeb1718 100644 --- a/velox/functions/sparksql/tests/FromJsonTest.cpp +++ b/velox/functions/sparksql/tests/FromJsonTest.cpp @@ -250,20 +250,20 @@ TEST_F(FromJsonTest, structWrongData) { testFromJson(input, makeRowVector({"a"}, {expected})); } -TEST_F(FromJsonTest, invalidType) { - auto primitiveTypeOutput = makeFlatVector({2, 2, 3}); - auto decimalOutput = makeFlatVector({2, 2, 3}, DECIMAL(16, 7)); - auto mapOutput = - makeMapVector({{{1, 1}}, {{2, 2}}, {{3, 3}}}); - auto input = makeFlatVector({R"(2)", R"({2)", R"({3)"}); - VELOX_ASSERT_USER_THROW( - testFromJson(input, primitiveTypeOutput), "Unsupported type BIGINT."); - VELOX_ASSERT_USER_THROW( - testFromJson(input, makeRowVector({"a"}, {decimalOutput})), - "Unsupported type ROW"); - VELOX_ASSERT_USER_THROW( - testFromJson(input, mapOutput), "Unsupported type MAP."); -} +// TEST_F(FromJsonTest, invalidType) { +// auto primitiveTypeOutput = makeFlatVector({2, 2, 3}); +// auto decimalOutput = makeFlatVector({2, 2, 3}, DECIMAL(16, 7)); +// auto mapOutput = +// makeMapVector({{{1, 1}}, {{2, 2}}, {{3, 3}}}); +// auto input = makeFlatVector({R"(2)", R"({2)", R"({3)"}); +// VELOX_ASSERT_USER_THROW( +// testFromJson(input, primitiveTypeOutput), "Unsupported type BIGINT."); +// VELOX_ASSERT_USER_THROW( +// testFromJson(input, makeRowVector({"a"}, {decimalOutput})), +// "Unsupported type ROW"); +// VELOX_ASSERT_USER_THROW( +// testFromJson(input, mapOutput), "Unsupported type MAP."); +// } TEST_F(FromJsonTest, invalidJson) { auto expected = makeNullableFlatVector( diff --git a/velox/functions/sparksql/tests/GetJsonObjectTest.cpp b/velox/functions/sparksql/tests/GetJsonObjectTest.cpp index ec437df7b823..bb812264d1e9 100644 --- a/velox/functions/sparksql/tests/GetJsonObjectTest.cpp +++ b/velox/functions/sparksql/tests/GetJsonObjectTest.cpp @@ -95,29 +95,29 @@ TEST_F(GetJsonObjectTest, basic) { "v2"); } -TEST_F(GetJsonObjectTest, nullResult) { - // Field not found. - EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$.hi"), std::nullopt); +// TEST_F(GetJsonObjectTest, nullResult) { +// // Field not found. +// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$.hi"), std::nullopt); - // Illegal json. - EXPECT_EQ(getJsonObject(R"({"hello"-3.5})", "$.hello"), std::nullopt); - EXPECT_EQ(getJsonObject(R"({"a": bad, "b": string})", "$.a"), std::nullopt); +// // Illegal json. +// EXPECT_EQ(getJsonObject(R"({"hello"-3.5})", "$.hello"), std::nullopt); +// EXPECT_EQ(getJsonObject(R"({"a": bad, "b": string})", "$.a"), std::nullopt); - // Illegal json path. - EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$hello"), std::nullopt); - EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$."), std::nullopt); - // The first char is not '$'. - EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", ".hello"), std::nullopt); - // Constains '$' not in the first position. - EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$.$hello"), std::nullopt); +// // Illegal json path. +// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$hello"), std::nullopt); +// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$."), std::nullopt); +// // The first char is not '$'. +// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", ".hello"), std::nullopt); +// // Constains '$' not in the first position. +// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$.$hello"), std::nullopt); - // Invalid ending character. - EXPECT_EQ( - getJsonObject( - R"([{"my": {"info": {"name": "Alice"quoted""}}}, {"other": ["v1", "v2"]}])", - "$[0].my.info.name"), - std::nullopt); -} +// // Invalid ending character. +// EXPECT_EQ( +// getJsonObject( +// R"([{"my": {"info": {"name": "Alice"quoted""}}}, {"other": ["v1", "v2"]}])", +// "$[0].my.info.name"), +// std::nullopt); +// } TEST_F(GetJsonObjectTest, incompleteJson) { EXPECT_EQ(getJsonObject(R"({"hello": "3.5"},)", "$.hello"), "3.5"); diff --git a/velox/functions/sparksql/tests/GetStructFieldTest.cpp b/velox/functions/sparksql/tests/GetStructFieldTest.cpp index b2f68e945bdc..0895194b1b9d 100644 --- a/velox/functions/sparksql/tests/GetStructFieldTest.cpp +++ b/velox/functions/sparksql/tests/GetStructFieldTest.cpp @@ -94,25 +94,25 @@ TEST_F(GetStructFieldTest, complexType) { testGetStructField(data, 2, colRow); } -TEST_F(GetStructFieldTest, invalidOrdinal) { - auto colInt = makeFlatVector({1, 2, 3, 4}); - auto colString = makeNullableFlatVector( - {"hello", "world", std::nullopt, "hi"}); - auto colIntWithNull = - makeNullableFlatVector({11, std::nullopt, 13, 14}); - auto data = makeRowVector({colInt, colString, colIntWithNull}); - - // Get int field. - VELOX_ASSERT_THROW( - testGetStructField(data, -1, colInt), - "Invalid ordinal. Should be greater than 0."); - - // Get string field. - VELOX_ASSERT_THROW( - testGetStructField(data, 4, colString), - fmt::format( - "(4 vs. 3) Invalid ordinal. Should be smaller than the children size of input row vector.")); -} +// TEST_F(GetStructFieldTest, invalidOrdinal) { +// auto colInt = makeFlatVector({1, 2, 3, 4}); +// auto colString = makeNullableFlatVector( +// {"hello", "world", std::nullopt, "hi"}); +// auto colIntWithNull = +// makeNullableFlatVector({11, std::nullopt, 13, 14}); +// auto data = makeRowVector({colInt, colString, colIntWithNull}); + +// // Get int field. +// VELOX_ASSERT_THROW( +// testGetStructField(data, -1, colInt), +// "Invalid ordinal. Should be greater than 0."); + +// // Get string field. +// VELOX_ASSERT_THROW( +// testGetStructField(data, 4, colString), +// fmt::format( +// "(4 vs. 3) Invalid ordinal. Should be smaller than the children size of input row vector.")); +// } } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/MakeDecimalTest.cpp b/velox/functions/sparksql/tests/MakeDecimalTest.cpp index 5d11bd9c0dfa..cdb0f888a1a2 100644 --- a/velox/functions/sparksql/tests/MakeDecimalTest.cpp +++ b/velox/functions/sparksql/tests/MakeDecimalTest.cpp @@ -52,51 +52,51 @@ class MakeDecimalTest : public SparkFunctionBaseTest { } }; -TEST_F(MakeDecimalTest, makeDecimal) { - testMakeDecimal( - makeFlatVector({1111, -1112, 9999, 0}), - std::nullopt, - makeFlatVector({1111, -1112, 9999, 0}, DECIMAL(5, 1))); - testMakeDecimal( - makeFlatVector({1111, -1112, 9999, 0}), - true, - makeFlatVector({1111, -1112, 9999, 0}, DECIMAL(5, 1))); - testMakeDecimal( - makeFlatVector( - {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), - true, - makeFlatVector( - {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}, - DECIMAL(38, 19))); - testMakeDecimal( - makeFlatVector( - {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), - true, - makeNullableFlatVector( - {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0))); - VELOX_ASSERT_THROW( - testMakeDecimal( - makeFlatVector( - {11111111, - -11112112, - 99999999, - DecimalUtil::kShortDecimalMax + 1}), - false, - makeNullableFlatVector( - {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0))), - "Unscaled value 1000000000000000000 too large for precision 18."); - testMakeDecimal( - makeFlatVector( - {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), - false, - makeNullableFlatVector( - {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0)), - true /*tryMakeDecimal*/); - testMakeDecimal( - makeNullableFlatVector({101, std::nullopt, 1000}), - true, - makeNullableFlatVector( - {101, std::nullopt, std::nullopt}, DECIMAL(3, 1))); -} +// TEST_F(MakeDecimalTest, makeDecimal) { +// testMakeDecimal( +// makeFlatVector({1111, -1112, 9999, 0}), +// std::nullopt, +// makeFlatVector({1111, -1112, 9999, 0}, DECIMAL(5, 1))); +// testMakeDecimal( +// makeFlatVector({1111, -1112, 9999, 0}), +// true, +// makeFlatVector({1111, -1112, 9999, 0}, DECIMAL(5, 1))); +// testMakeDecimal( +// makeFlatVector( +// {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), +// true, +// makeFlatVector( +// {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}, +// DECIMAL(38, 19))); +// testMakeDecimal( +// makeFlatVector( +// {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), +// true, +// makeNullableFlatVector( +// {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0))); +// VELOX_ASSERT_THROW( +// testMakeDecimal( +// makeFlatVector( +// {11111111, +// -11112112, +// 99999999, +// DecimalUtil::kShortDecimalMax + 1}), +// false, +// makeNullableFlatVector( +// {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0))), +// "Unscaled value 1000000000000000000 too large for precision 18."); +// testMakeDecimal( +// makeFlatVector( +// {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), +// false, +// makeNullableFlatVector( +// {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0)), +// true /*tryMakeDecimal*/); +// testMakeDecimal( +// makeNullableFlatVector({101, std::nullopt, 1000}), +// true, +// makeNullableFlatVector( +// {101, std::nullopt, std::nullopt}, DECIMAL(3, 1))); +// } } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/MakeTimestampTest.cpp b/velox/functions/sparksql/tests/MakeTimestampTest.cpp index 624058c9bdec..f9006151debb 100644 --- a/velox/functions/sparksql/tests/MakeTimestampTest.cpp +++ b/velox/functions/sparksql/tests/MakeTimestampTest.cpp @@ -106,127 +106,127 @@ TEST_F(MakeTimestampTest, basic) { } } -TEST_F(MakeTimestampTest, errors) { - const auto microsType = DECIMAL(16, 6); - const auto testInvalidInputs = [&](const RowVectorPtr& data) { - std::vector> nullResults( - data->size(), std::nullopt); - auto expected = makeNullableFlatVector(nullResults); - auto result = evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data); - facebook::velox::test::assertEqualVectors(expected, result); - }; - std::optional one = 1; - const auto testInvalidSeconds = [&](std::optional microsec) { - auto result = evaluateOnce( - "make_timestamp(c0, c1, c2, c3, c4, c5)", - {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, - one, - one, - one, - one, - one, - microsec); - EXPECT_EQ(result, std::nullopt); - }; - const auto testInvalidArguments = [&](std::optional microsec, - const TypePtr& microsType) { - return evaluateOnce( - "make_timestamp(c0, c1, c2, c3, c4, c5)", - {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, - one, - one, - one, - one, - one, - microsec); - }; - - // Throw if no session time zone. - VELOX_ASSERT_USER_THROW( - testInvalidArguments(60007000, DECIMAL(16, 6)), - "make_timestamp requires session time zone to be set."); - - setQueryTimeZone("Asia/Shanghai"); - // Invalid input returns null. - const auto year = makeFlatVector( - {facebook::velox::util::kMinYear - 1, - facebook::velox::util::kMaxYear + 1, - 1, - 1, - 1, - 1, - 1, - 1}); - const auto month = makeFlatVector({1, 1, 0, 13, 1, 1, 1, 1}); - const auto day = makeFlatVector({1, 1, 1, 1, 0, 32, 1, 1}); - const auto hour = makeFlatVector({1, 1, 1, 1, 1, 1, 25, 1}); - const auto minute = makeFlatVector({1, 1, 1, 1, 1, 1, 1, 61}); - const auto micros = - makeFlatVector({1, 1, 1, 1, 1, 1, 1, 1}, microsType); - auto data = makeRowVector({year, month, day, hour, minute, micros}); - testInvalidInputs(data); - - // Seconds should be either in the range of [0,59], or 60 with zero - // microseconds. - testInvalidSeconds(61e6); - testInvalidSeconds(99999999); - testInvalidSeconds(999999999); - testInvalidSeconds(60007000); - - // Throw if data type for microseconds is invalid. - VELOX_ASSERT_THROW( - testInvalidArguments(1e6, DECIMAL(20, 6)), - "Seconds must be short decimal type but got DECIMAL(20, 6)"); - VELOX_ASSERT_THROW( - testInvalidArguments(1e6, DECIMAL(16, 8)), - "Scalar function signature is not supported: " - "make_timestamp(INTEGER, INTEGER, INTEGER, INTEGER, INTEGER, " - "DECIMAL(16, 8))."); -} - -TEST_F(MakeTimestampTest, invalidTimezone) { - const auto microsType = DECIMAL(16, 6); - const auto year = makeFlatVector({2021, 2021, 2021, 2021, 2021}); - const auto month = makeFlatVector({7, 7, 7, 7, 7}); - const auto day = makeFlatVector({11, 11, 11, 11, 11}); - const auto hour = makeFlatVector({6, 6, 6, -6, 6}); - const auto minute = makeFlatVector({30, 30, 30, 30, 30}); - const auto micros = makeNullableFlatVector( - {45678000, 1e6, 6e7, 59999999, std::nullopt}, microsType); - auto data = makeRowVector({year, month, day, hour, minute, micros}); - - // Time zone is not set. - VELOX_ASSERT_USER_THROW( - evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data), - "make_timestamp requires session time zone to be set."); - - // Invalid constant time zone. - setQueryTimeZone("GMT"); - for (auto timeZone : {"Invalid", ""}) { - SCOPED_TRACE(fmt::format("timezone: {}", timeZone)); - VELOX_ASSERT_USER_THROW( - evaluate( - fmt::format( - "make_timestamp(c0, c1, c2, c3, c4, c5, '{}')", timeZone), - data), - fmt::format("Unknown time zone: '{}'", timeZone)); - } - - // Invalid timezone from vector. - auto timeZones = makeFlatVector( - {"GMT", "CET", "Asia/Shanghai", "Invalid", "GMT"}); - data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); - VELOX_ASSERT_USER_THROW( - evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), - "Unknown time zone: 'Invalid'"); - - timeZones = - makeFlatVector({"GMT", "CET", "Asia/Shanghai", "", "GMT"}); - data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); - VELOX_ASSERT_USER_THROW( - evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), - "Unknown time zone: ''"); -} +// TEST_F(MakeTimestampTest, errors) { +// const auto microsType = DECIMAL(16, 6); +// const auto testInvalidInputs = [&](const RowVectorPtr& data) { +// std::vector> nullResults( +// data->size(), std::nullopt); +// auto expected = makeNullableFlatVector(nullResults); +// auto result = evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data); +// facebook::velox::test::assertEqualVectors(expected, result); +// }; +// std::optional one = 1; +// const auto testInvalidSeconds = [&](std::optional microsec) { +// auto result = evaluateOnce( +// "make_timestamp(c0, c1, c2, c3, c4, c5)", +// {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, +// one, +// one, +// one, +// one, +// one, +// microsec); +// EXPECT_EQ(result, std::nullopt); +// }; +// const auto testInvalidArguments = [&](std::optional microsec, +// const TypePtr& microsType) { +// return evaluateOnce( +// "make_timestamp(c0, c1, c2, c3, c4, c5)", +// {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, +// one, +// one, +// one, +// one, +// one, +// microsec); +// }; + +// // Throw if no session time zone. +// VELOX_ASSERT_USER_THROW( +// testInvalidArguments(60007000, DECIMAL(16, 6)), +// "make_timestamp requires session time zone to be set."); + +// setQueryTimeZone("Asia/Shanghai"); +// // Invalid input returns null. +// const auto year = makeFlatVector( +// {facebook::velox::util::kMinYear - 1, +// facebook::velox::util::kMaxYear + 1, +// 1, +// 1, +// 1, +// 1, +// 1, +// 1}); +// const auto month = makeFlatVector({1, 1, 0, 13, 1, 1, 1, 1}); +// const auto day = makeFlatVector({1, 1, 1, 1, 0, 32, 1, 1}); +// const auto hour = makeFlatVector({1, 1, 1, 1, 1, 1, 25, 1}); +// const auto minute = makeFlatVector({1, 1, 1, 1, 1, 1, 1, 61}); +// const auto micros = +// makeFlatVector({1, 1, 1, 1, 1, 1, 1, 1}, microsType); +// auto data = makeRowVector({year, month, day, hour, minute, micros}); +// testInvalidInputs(data); + +// // Seconds should be either in the range of [0,59], or 60 with zero +// // microseconds. +// testInvalidSeconds(61e6); +// testInvalidSeconds(99999999); +// testInvalidSeconds(999999999); +// testInvalidSeconds(60007000); + +// // Throw if data type for microseconds is invalid. +// VELOX_ASSERT_THROW( +// testInvalidArguments(1e6, DECIMAL(20, 6)), +// "Seconds must be short decimal type but got DECIMAL(20, 6)"); +// VELOX_ASSERT_THROW( +// testInvalidArguments(1e6, DECIMAL(16, 8)), +// "Scalar function signature is not supported: " +// "make_timestamp(INTEGER, INTEGER, INTEGER, INTEGER, INTEGER, " +// "DECIMAL(16, 8))."); +// } + +// TEST_F(MakeTimestampTest, invalidTimezone) { +// const auto microsType = DECIMAL(16, 6); +// const auto year = makeFlatVector({2021, 2021, 2021, 2021, 2021}); +// const auto month = makeFlatVector({7, 7, 7, 7, 7}); +// const auto day = makeFlatVector({11, 11, 11, 11, 11}); +// const auto hour = makeFlatVector({6, 6, 6, -6, 6}); +// const auto minute = makeFlatVector({30, 30, 30, 30, 30}); +// const auto micros = makeNullableFlatVector( +// {45678000, 1e6, 6e7, 59999999, std::nullopt}, microsType); +// auto data = makeRowVector({year, month, day, hour, minute, micros}); + +// // Time zone is not set. +// VELOX_ASSERT_USER_THROW( +// evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data), +// "make_timestamp requires session time zone to be set."); + +// // Invalid constant time zone. +// setQueryTimeZone("GMT"); +// for (auto timeZone : {"Invalid", ""}) { +// SCOPED_TRACE(fmt::format("timezone: {}", timeZone)); +// VELOX_ASSERT_USER_THROW( +// evaluate( +// fmt::format( +// "make_timestamp(c0, c1, c2, c3, c4, c5, '{}')", timeZone), +// data), +// fmt::format("Unknown time zone: '{}'", timeZone)); +// } + +// // Invalid timezone from vector. +// auto timeZones = makeFlatVector( +// {"GMT", "CET", "Asia/Shanghai", "Invalid", "GMT"}); +// data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); +// VELOX_ASSERT_USER_THROW( +// evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), +// "Unknown time zone: 'Invalid'"); + +// timeZones = +// makeFlatVector({"GMT", "CET", "Asia/Shanghai", "", "GMT"}); +// data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); +// VELOX_ASSERT_USER_THROW( +// evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), +// "Unknown time zone: ''"); +// } } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/MaskTest.cpp b/velox/functions/sparksql/tests/MaskTest.cpp index cd5b6179a42f..6453553a0dba 100644 --- a/velox/functions/sparksql/tests/MaskTest.cpp +++ b/velox/functions/sparksql/tests/MaskTest.cpp @@ -269,30 +269,30 @@ TEST_F(MaskTest, mask) { EXPECT_EQ(maskWithThreeArg("ABCabc", "🚀", "🚀"), "🚀🚀🚀🚀🚀🚀"); } -TEST_F(MaskTest, maskWithError) { - std::string upperChar = "Y"; - std::string lowerChar = "y"; - std::string digitChar = "d"; - std::string otherChar = "*"; - VELOX_ASSERT_USER_THROW( - maskWithFiveArg("AbCD123-@$#", "", lowerChar, digitChar, otherChar), - "Replacement string must contain a single character and cannot be empty."); +// TEST_F(MaskTest, maskWithError) { +// std::string upperChar = "Y"; +// std::string lowerChar = "y"; +// std::string digitChar = "d"; +// std::string otherChar = "*"; +// VELOX_ASSERT_USER_THROW( +// maskWithFiveArg("AbCD123-@$#", "", lowerChar, digitChar, otherChar), +// "Replacement string must contain a single character and cannot be empty."); - VELOX_ASSERT_USER_THROW( - maskWithFiveArg("AbCD123-@$#", "🚀🚀", lowerChar, digitChar, otherChar), - "Replacement string must contain a single character and cannot be empty."); +// VELOX_ASSERT_USER_THROW( +// maskWithFiveArg("AbCD123-@$#", "🚀🚀", lowerChar, digitChar, otherChar), +// "Replacement string must contain a single character and cannot be empty."); - VELOX_ASSERT_USER_THROW( - maskWithFiveArg("AbCD123-@$#", upperChar, "", digitChar, otherChar), - "Replacement string must contain a single character and cannot be empty."); +// VELOX_ASSERT_USER_THROW( +// maskWithFiveArg("AbCD123-@$#", upperChar, "", digitChar, otherChar), +// "Replacement string must contain a single character and cannot be empty."); - VELOX_ASSERT_USER_THROW( - maskWithFiveArg("AbCD123-@$#", upperChar, lowerChar, "", otherChar), - "Replacement string must contain a single character and cannot be empty."); +// VELOX_ASSERT_USER_THROW( +// maskWithFiveArg("AbCD123-@$#", upperChar, lowerChar, "", otherChar), +// "Replacement string must contain a single character and cannot be empty."); - VELOX_ASSERT_USER_THROW( - maskWithFiveArg("AbCD123-@$#", upperChar, lowerChar, digitChar, ""), - "Replacement string must contain a single character and cannot be empty."); -} +// VELOX_ASSERT_USER_THROW( +// maskWithFiveArg("AbCD123-@$#", upperChar, lowerChar, digitChar, ""), +// "Replacement string must contain a single character and cannot be empty."); +// } } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/RegexFunctionsTest.cpp b/velox/functions/sparksql/tests/RegexFunctionsTest.cpp index 412ef520792c..7324bfe59166 100644 --- a/velox/functions/sparksql/tests/RegexFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/RegexFunctionsTest.cpp @@ -215,20 +215,20 @@ TEST_F(RegexFunctionsTest, allowSimpleConstantRegex) { EXPECT_EQ(rlike(std::nullopt, "a*"), std::nullopt); } -TEST_F(RegexFunctionsTest, blockUnsupportedEdgeCases) { - // Non-constant pattern. - EXPECT_THROW( - evaluateOnce("rlike('a', c0)", std::optional("a*")), - VeloxUserError); -} - -TEST_F(RegexFunctionsTest, regexMatchRegistration) { - EXPECT_THROW( - evaluateOnce( - "regexp_extract('a', c0)", std::optional("a*")), - VeloxUserError); - EXPECT_EQ(regexp_extract("abc", "a."), "ab"); -} +// TEST_F(RegexFunctionsTest, blockUnsupportedEdgeCases) { +// // Non-constant pattern. +// EXPECT_THROW( +// evaluateOnce("rlike('a', c0)", std::optional("a*")), +// VeloxUserError); +// } + +// TEST_F(RegexFunctionsTest, regexMatchRegistration) { +// EXPECT_THROW( +// evaluateOnce( +// "regexp_extract('a', c0)", std::optional("a*")), +// VeloxUserError); +// EXPECT_EQ(regexp_extract("abc", "a."), "ab"); +// } TEST_F(RegexFunctionsTest, regexpReplaceRegistration) { std::string output = "teeheebc"; diff --git a/velox/functions/sparksql/tests/SortArrayTest.cpp b/velox/functions/sparksql/tests/SortArrayTest.cpp index fa0449a37f9e..8955f175340c 100644 --- a/velox/functions/sparksql/tests/SortArrayTest.cpp +++ b/velox/functions/sparksql/tests/SortArrayTest.cpp @@ -75,14 +75,14 @@ class SortArrayTest : public SparkFunctionBaseTest { } }; -TEST_F(SortArrayTest, invalidInput) { - auto arg0 = makeNullableArrayVector({{0, 1}}); - std::vector v = {false}; - auto arg1 = makeFlatVector(v); - ASSERT_THROW( - evaluate("sort_array(c0, c1)", makeRowVector({arg0, arg1})), - VeloxException); -} +// TEST_F(SortArrayTest, invalidInput) { +// auto arg0 = makeNullableArrayVector({{0, 1}}); +// std::vector v = {false}; +// auto arg1 = makeFlatVector(v); +// ASSERT_THROW( +// evaluate("sort_array(c0, c1)", makeRowVector({arg0, arg1})), +// VeloxException); +// } TEST_F(SortArrayTest, int8) { testInt(); diff --git a/velox/functions/sparksql/tests/SparkPartitionIdTest.cpp b/velox/functions/sparksql/tests/SparkPartitionIdTest.cpp index 67eaaf4ddb23..94754919feba 100644 --- a/velox/functions/sparksql/tests/SparkPartitionIdTest.cpp +++ b/velox/functions/sparksql/tests/SparkPartitionIdTest.cpp @@ -39,18 +39,18 @@ TEST_F(SparkPartitionIdTest, basic) { testSparkPartitionId(100, 100); } -TEST_F(SparkPartitionIdTest, error) { - auto rowVector = makeRowVector(ROW({}), 1); +// TEST_F(SparkPartitionIdTest, error) { +// auto rowVector = makeRowVector(ROW({}), 1); - queryCtx_->testingOverrideConfigUnsafe({{}}); - VELOX_ASSERT_THROW( - evaluate("spark_partition_id()", rowVector), - "Spark partition id is not set"); +// queryCtx_->testingOverrideConfigUnsafe({{}}); +// VELOX_ASSERT_THROW( +// evaluate("spark_partition_id()", rowVector), +// "Spark partition id is not set"); - setSparkPartitionId(-1); - VELOX_ASSERT_THROW( - evaluate("spark_partition_id()", rowVector), - "Invalid Spark partition id"); -} +// setSparkPartitionId(-1); +// VELOX_ASSERT_THROW( +// evaluate("spark_partition_id()", rowVector), +// "Invalid Spark partition id"); +// } } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/StringTest.cpp b/velox/functions/sparksql/tests/StringTest.cpp index 419488eb5e5e..2cf231f08766 100644 --- a/velox/functions/sparksql/tests/StringTest.cpp +++ b/velox/functions/sparksql/tests/StringTest.cpp @@ -17,7 +17,9 @@ #include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h" #include "velox/type/Type.h" +#include #include +#include namespace facebook::velox::functions::sparksql::test { namespace { @@ -1055,5 +1057,14 @@ TEST_F(StringTest, empty2Null) { EXPECT_EQ(empty2Null(""), std::nullopt); EXPECT_EQ(empty2Null("abc"), "abc"); } + +TEST_F(StringTest, splitIndex) { + const auto splitIndex = [&](const std::optional& a, + const std::optional& d, const std::optional& i) { + return evaluateOnce("split_index(c0, c1, c2)", a, d, i); + }; + EXPECT_EQ(splitIndex("a/b/c", "/", 2), "b"); +} + } // namespace } // namespace facebook::velox::functions::sparksql::test From 78b190fc33f94531b456706de2cc2cd8e3cf62bd Mon Sep 17 00:00:00 2001 From: zouyunhe Date: Mon, 8 Sep 2025 09:52:55 +0000 Subject: [PATCH 4/9] add test for invalid index --- velox/functions/sparksql/String.h | 33 +++++++++++++++++-- velox/functions/sparksql/tests/StringTest.cpp | 9 ++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/velox/functions/sparksql/String.h b/velox/functions/sparksql/String.h index 55185c16d32b..4755c7ecf9be 100644 --- a/velox/functions/sparksql/String.h +++ b/velox/functions/sparksql/String.h @@ -22,7 +22,6 @@ #include "velox/functions/Macros.h" #include "velox/functions/lib/string/StringCore.h" #include "velox/functions/lib/string/StringImpl.h" -#include "velox/functions/prestosql/SplitPart.h" namespace facebook::velox::functions::sparksql { @@ -1558,6 +1557,36 @@ struct Empty2NullFunction { }; template -struct SplitIndex : public functions::SplitPart {}; +struct SplitIndex { + VELOX_DEFINE_FUNCTION_TYPES(T); + + // Results refer to strings in the first argument. + static constexpr int32_t reuse_strings_from_arg = 0; + + // ASCII input always produces ASCII result. + static constexpr bool is_default_ascii_behavior = true; + + FOLLY_ALWAYS_INLINE bool call( + out_type& result, + const arg_type& input, + const arg_type& delimiter, + const int64_t& index) { + if (index <= 0) { + return false; + } + return stringImpl::splitPart(result, input, delimiter, index); + } + + FOLLY_ALWAYS_INLINE bool callAscii( + out_type& result, + const arg_type& input, + const arg_type& delimiter, + const int64_t& index) { + if (index <= 0) { + return false; + } + return stringImpl::splitPart(result, input, delimiter, index); + } +}; } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/tests/StringTest.cpp b/velox/functions/sparksql/tests/StringTest.cpp index 2cf231f08766..b32748b941a2 100644 --- a/velox/functions/sparksql/tests/StringTest.cpp +++ b/velox/functions/sparksql/tests/StringTest.cpp @@ -1063,7 +1063,14 @@ TEST_F(StringTest, splitIndex) { const std::optional& d, const std::optional& i) { return evaluateOnce("split_index(c0, c1, c2)", a, d, i); }; - EXPECT_EQ(splitIndex("a/b/c", "/", 2), "b"); + EXPECT_EQ(splitIndex("a/b/c", "/", 1), "a"); + EXPECT_EQ(splitIndex("a/b/c", "/", 2), "b"); + const std::optional res0 = splitIndex("a/b/c", "/", 0); + const std::optional res1 = splitIndex("a/b/c", "/", -1); + const std::optional res2 = splitIndex("a/b/c", "/", 4); + EXPECT_EQ(res0.has_value(), false); + EXPECT_EQ(res1.has_value(), false); + EXPECT_EQ(res2.has_value(), false); } } // namespace From 311b05d35c8547ede338ed3d5a7906cc4c132671 Mon Sep 17 00:00:00 2001 From: zouyunhe Date: Mon, 8 Sep 2025 09:59:25 +0000 Subject: [PATCH 5/9] remove useless changes --- .../sparksql/tests/ArithmeticTest.cpp | 102 ++++---- .../sparksql/tests/AtLeastNNonNullsTest.cpp | 34 +-- .../sparksql/tests/DateTimeFunctionsTest.cpp | 36 +-- .../sparksql/tests/ElementAtTest.cpp | 50 ++-- .../functions/sparksql/tests/FromJsonTest.cpp | 28 +- .../sparksql/tests/GetJsonObjectTest.cpp | 40 +-- .../sparksql/tests/GetStructFieldTest.cpp | 38 +-- .../sparksql/tests/MakeDecimalTest.cpp | 92 +++---- .../sparksql/tests/MakeTimestampTest.cpp | 242 +++++++++--------- velox/functions/sparksql/tests/MaskTest.cpp | 42 +-- .../sparksql/tests/RegexFunctionsTest.cpp | 28 +- .../sparksql/tests/SortArrayTest.cpp | 16 +- .../sparksql/tests/SparkPartitionIdTest.cpp | 22 +- 13 files changed, 385 insertions(+), 385 deletions(-) diff --git a/velox/functions/sparksql/tests/ArithmeticTest.cpp b/velox/functions/sparksql/tests/ArithmeticTest.cpp index d0dcc111bbea..72ac45caf0d0 100644 --- a/velox/functions/sparksql/tests/ArithmeticTest.cpp +++ b/velox/functions/sparksql/tests/ArithmeticTest.cpp @@ -677,57 +677,57 @@ TEST_F(ArithmeticTest, widthBucket) { EXPECT_EQ(widthBucket(-kInf, 0, 4, 3), 0); } -// TEST_F(ArithmeticTest, checkedAdd) { -// assertErrorForCheckedAdd(INT8_MAX, 1, "Arithmetic overflow: 127 + 1"); -// assertErrorForCheckedAdd( -// INT16_MAX, 1, "Arithmetic overflow: 32767 + 1"); -// assertErrorForCheckedAdd( -// INT32_MAX, 1, "Arithmetic overflow: 2147483647 + 1"); -// assertErrorForCheckedAdd( -// INT64_MAX, 1, "Arithmetic overflow: 9223372036854775807 + 1"); -// EXPECT_EQ(checkedAdd(kInf, 1), kInf); -// EXPECT_EQ(checkedAdd(kInfDouble, 1), kInfDouble); -// } - -// TEST_F(ArithmeticTest, checkedSubtract) { -// assertErrorForcheckedSubtract( -// INT8_MIN, 1, "Arithmetic overflow: -128 - 1"); -// assertErrorForcheckedSubtract( -// INT16_MIN, 1, "Arithmetic overflow: -32768 - 1"); -// assertErrorForcheckedSubtract( -// INT32_MIN, 1, "Arithmetic overflow: -2147483648 - 1"); -// assertErrorForcheckedSubtract( -// INT64_MIN, 1, "Arithmetic overflow: -9223372036854775808 - 1"); -// EXPECT_EQ(checkedSubtract(kInf, 1), kInf); -// EXPECT_EQ(checkedSubtract(kInfDouble, 1), kInfDouble); -// } - -// TEST_F(ArithmeticTest, checkedMultiply) { -// assertErrorForCheckedMultiply( -// INT8_MAX, 2, "Arithmetic overflow: 127 * 2"); -// assertErrorForCheckedMultiply( -// INT16_MAX, 2, "Arithmetic overflow: 32767 * 2"); -// assertErrorForCheckedMultiply( -// INT32_MAX, 2, "Arithmetic overflow: 2147483647 * 2"); -// assertErrorForCheckedMultiply( -// INT64_MAX, 2, "Arithmetic overflow: 9223372036854775807 * 2"); -// EXPECT_EQ(checkedMultiply(kInf, 1), kInf); -// EXPECT_EQ(checkedMultiply(kInfDouble, 1), kInfDouble); -// } - -// TEST_F(ArithmeticTest, checkedDivide) { -// assertErrorForCheckedDivide(1, 0, "division by zero"); -// assertErrorForCheckedDivide( -// INT8_MIN, -1, "Arithmetic overflow: -128 / -1"); -// assertErrorForCheckedDivide( -// INT16_MIN, -1, "Arithmetic overflow: -32768 / -1"); -// assertErrorForCheckedDivide( -// INT32_MIN, -1, "Arithmetic overflow: -2147483648 / -1"); -// assertErrorForCheckedDivide( -// INT64_MIN, -1, "Arithmetic overflow: -9223372036854775808 / -1"); -// EXPECT_EQ(checkedDivide(kInf, 1), kInf); -// EXPECT_EQ(checkedDivide(kInfDouble, 1), kInfDouble); -// } +TEST_F(ArithmeticTest, checkedAdd) { + assertErrorForCheckedAdd(INT8_MAX, 1, "Arithmetic overflow: 127 + 1"); + assertErrorForCheckedAdd( + INT16_MAX, 1, "Arithmetic overflow: 32767 + 1"); + assertErrorForCheckedAdd( + INT32_MAX, 1, "Arithmetic overflow: 2147483647 + 1"); + assertErrorForCheckedAdd( + INT64_MAX, 1, "Arithmetic overflow: 9223372036854775807 + 1"); + EXPECT_EQ(checkedAdd(kInf, 1), kInf); + EXPECT_EQ(checkedAdd(kInfDouble, 1), kInfDouble); +} + +TEST_F(ArithmeticTest, checkedSubtract) { + assertErrorForcheckedSubtract( + INT8_MIN, 1, "Arithmetic overflow: -128 - 1"); + assertErrorForcheckedSubtract( + INT16_MIN, 1, "Arithmetic overflow: -32768 - 1"); + assertErrorForcheckedSubtract( + INT32_MIN, 1, "Arithmetic overflow: -2147483648 - 1"); + assertErrorForcheckedSubtract( + INT64_MIN, 1, "Arithmetic overflow: -9223372036854775808 - 1"); + EXPECT_EQ(checkedSubtract(kInf, 1), kInf); + EXPECT_EQ(checkedSubtract(kInfDouble, 1), kInfDouble); +} + +TEST_F(ArithmeticTest, checkedMultiply) { + assertErrorForCheckedMultiply( + INT8_MAX, 2, "Arithmetic overflow: 127 * 2"); + assertErrorForCheckedMultiply( + INT16_MAX, 2, "Arithmetic overflow: 32767 * 2"); + assertErrorForCheckedMultiply( + INT32_MAX, 2, "Arithmetic overflow: 2147483647 * 2"); + assertErrorForCheckedMultiply( + INT64_MAX, 2, "Arithmetic overflow: 9223372036854775807 * 2"); + EXPECT_EQ(checkedMultiply(kInf, 1), kInf); + EXPECT_EQ(checkedMultiply(kInfDouble, 1), kInfDouble); +} + +TEST_F(ArithmeticTest, checkedDivide) { + assertErrorForCheckedDivide(1, 0, "division by zero"); + assertErrorForCheckedDivide( + INT8_MIN, -1, "Arithmetic overflow: -128 / -1"); + assertErrorForCheckedDivide( + INT16_MIN, -1, "Arithmetic overflow: -32768 / -1"); + assertErrorForCheckedDivide( + INT32_MIN, -1, "Arithmetic overflow: -2147483648 / -1"); + assertErrorForCheckedDivide( + INT64_MIN, -1, "Arithmetic overflow: -9223372036854775808 / -1"); + EXPECT_EQ(checkedDivide(kInf, 1), kInf); + EXPECT_EQ(checkedDivide(kInfDouble, 1), kInfDouble); +} class LogNTest : public SparkFunctionBaseTest { protected: diff --git a/velox/functions/sparksql/tests/AtLeastNNonNullsTest.cpp b/velox/functions/sparksql/tests/AtLeastNNonNullsTest.cpp index e2fe7d5f532a..8bb373cd9d6a 100644 --- a/velox/functions/sparksql/tests/AtLeastNNonNullsTest.cpp +++ b/velox/functions/sparksql/tests/AtLeastNNonNullsTest.cpp @@ -94,23 +94,23 @@ TEST_F(AtLeastNNonNullsTest, basic) { testAtLeastNNonNulls(4, {maps, arrays, consts, dicts}, expected); } -// TEST_F(AtLeastNNonNullsTest, error) { -// auto input = makeFlatVector({1, 2, 3}); +TEST_F(AtLeastNNonNullsTest, error) { + auto input = makeFlatVector({1, 2, 3}); -// VELOX_ASSERT_USER_THROW( -// evaluate("at_least_n_non_nulls(1.0, c0)", makeRowVector({input})), -// "The first input type should be INTEGER but got DOUBLE"); -// VELOX_ASSERT_USER_THROW( -// evaluate("at_least_n_non_nulls(1)", makeRowVector({})), -// "AtLeastNNonNulls expects to receive at least 2 arguments"); -// VELOX_ASSERT_USER_THROW( -// evaluate("at_least_n_non_nulls(c0, c1)", makeRowVector({input, input})), -// "The first parameter should be constant expression"); -// VELOX_ASSERT_USER_THROW( -// evaluate( -// "at_least_n_non_nulls(cast(null as int), c0)", -// makeRowVector({input})), -// "The first parameter should not be null"); -// } + VELOX_ASSERT_USER_THROW( + evaluate("at_least_n_non_nulls(1.0, c0)", makeRowVector({input})), + "The first input type should be INTEGER but got DOUBLE"); + VELOX_ASSERT_USER_THROW( + evaluate("at_least_n_non_nulls(1)", makeRowVector({})), + "AtLeastNNonNulls expects to receive at least 2 arguments"); + VELOX_ASSERT_USER_THROW( + evaluate("at_least_n_non_nulls(c0, c1)", makeRowVector({input, input})), + "The first parameter should be constant expression"); + VELOX_ASSERT_USER_THROW( + evaluate( + "at_least_n_non_nulls(cast(null as int), c0)", + makeRowVector({input})), + "The first parameter should not be null"); +} } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index 64197efa46e0..80584844bb10 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -326,24 +326,24 @@ TEST_F(DateTimeFunctionsTest, unixTimestampTimestampInput) { EXPECT_EQ(kMin, unixTimestamp(Timestamp(kMin, 0))); } -// TEST_F(DateTimeFunctionsTest, unixTimestampDateInput) { -// const auto unixTimestamp = [&](std::optional date) { -// return evaluateOnce("unix_timestamp(c0)", {DATE()}, date); -// }; -// EXPECT_EQ(0, unixTimestamp(parseDate("1970-01-01"))); -// EXPECT_EQ(1727740800, unixTimestamp(parseDate("2024-10-01"))); -// EXPECT_EQ(-126065894400, unixTimestamp(parseDate("-2025-02-18"))); -// setQueryTimeZone("America/Los_Angeles"); -// EXPECT_EQ(1727766000, unixTimestamp(parseDate("2024-10-01"))); -// EXPECT_EQ(-126065866022, unixTimestamp(parseDate("-2025-02-18"))); -// EXPECT_EQ(2398320000, unixTimestamp(parseDate("2045-12-31"))); -// VELOX_ASSERT_USER_THROW( -// unixTimestamp(kMax), -// "Could not convert date 5881580-07-11 to unix timestamp."); -// VELOX_ASSERT_USER_THROW( -// unixTimestamp(kMin), -// "Could not convert date -5877641-06-23 to unix timestamp."); -// } +TEST_F(DateTimeFunctionsTest, unixTimestampDateInput) { + const auto unixTimestamp = [&](std::optional date) { + return evaluateOnce("unix_timestamp(c0)", {DATE()}, date); + }; + EXPECT_EQ(0, unixTimestamp(parseDate("1970-01-01"))); + EXPECT_EQ(1727740800, unixTimestamp(parseDate("2024-10-01"))); + EXPECT_EQ(-126065894400, unixTimestamp(parseDate("-2025-02-18"))); + setQueryTimeZone("America/Los_Angeles"); + EXPECT_EQ(1727766000, unixTimestamp(parseDate("2024-10-01"))); + EXPECT_EQ(-126065866022, unixTimestamp(parseDate("-2025-02-18"))); + EXPECT_EQ(2398320000, unixTimestamp(parseDate("2045-12-31"))); + VELOX_ASSERT_USER_THROW( + unixTimestamp(kMax), + "Could not convert date 5881580-07-11 to unix timestamp."); + VELOX_ASSERT_USER_THROW( + unixTimestamp(kMin), + "Could not convert date -5877641-06-23 to unix timestamp."); +} // unix_timestamp and to_unix_timestamp are aliases. TEST_F(DateTimeFunctionsTest, toUnixTimestamp) { diff --git a/velox/functions/sparksql/tests/ElementAtTest.cpp b/velox/functions/sparksql/tests/ElementAtTest.cpp index ec450520247b..67f33cbe426c 100644 --- a/velox/functions/sparksql/tests/ElementAtTest.cpp +++ b/velox/functions/sparksql/tests/ElementAtTest.cpp @@ -48,32 +48,32 @@ class ElementAtTest : public SparkFunctionBaseTest { // #1 - start indices at 1. If Index is 0 will throw an error. // #2 - allow out of bounds access for arrays (return null). // #3 - allow negative indices (return elements from the last to the first). -// TEST_F(ElementAtTest, allFlavors2) { -// auto arrayVector = makeArrayVector({{10, 11, 12}}); +TEST_F(ElementAtTest, allFlavors2) { + auto arrayVector = makeArrayVector({{10, 11, 12}}); -// // Create a simple vector containing a single map ([10=>10, 11=>11, 12=>12]). -// auto keyAt = [](auto idx) { return idx + 10; }; -// auto sizeAt = [](auto) { return 3; }; -// auto mapValueAt = [](auto idx) { return idx + 10; }; -// auto mapVector = -// makeMapVector(1, sizeAt, keyAt, mapValueAt); + // Create a simple vector containing a single map ([10=>10, 11=>11, 12=>12]). + auto keyAt = [](auto idx) { return idx + 10; }; + auto sizeAt = [](auto) { return 3; }; + auto mapValueAt = [](auto idx) { return idx + 10; }; + auto mapVector = + makeMapVector(1, sizeAt, keyAt, mapValueAt); -// // #1 -// EXPECT_EQ(elementAtSimple("element_at(C0, 1)", {arrayVector}), 10); -// EXPECT_EQ(elementAtSimple("element_at(C0, 2)", {arrayVector}), 11); -// EXPECT_EQ(elementAtSimple("element_at(C0, 3)", {arrayVector}), 12); -// VELOX_ASSERT_THROW( -// elementAtSimple("element_at(C0, 0)", {arrayVector}), -// "SQL array indices start at 1"); -// // #2 -// EXPECT_EQ(elementAtSimple("element_at(C0, 4)", {arrayVector}), std::nullopt); -// EXPECT_EQ(elementAtSimple("element_at(C0, 5)", {arrayVector}), std::nullopt); -// EXPECT_EQ(elementAtSimple("element_at(C0, 1001)", {mapVector}), std::nullopt); + // #1 + EXPECT_EQ(elementAtSimple("element_at(C0, 1)", {arrayVector}), 10); + EXPECT_EQ(elementAtSimple("element_at(C0, 2)", {arrayVector}), 11); + EXPECT_EQ(elementAtSimple("element_at(C0, 3)", {arrayVector}), 12); + VELOX_ASSERT_THROW( + elementAtSimple("element_at(C0, 0)", {arrayVector}), + "SQL array indices start at 1"); + // #2 + EXPECT_EQ(elementAtSimple("element_at(C0, 4)", {arrayVector}), std::nullopt); + EXPECT_EQ(elementAtSimple("element_at(C0, 5)", {arrayVector}), std::nullopt); + EXPECT_EQ(elementAtSimple("element_at(C0, 1001)", {mapVector}), std::nullopt); -// // #3 -// EXPECT_EQ(elementAtSimple("element_at(C0, -1)", {arrayVector}), 12); -// EXPECT_EQ(elementAtSimple("element_at(C0, -2)", {arrayVector}), 11); -// EXPECT_EQ(elementAtSimple("element_at(C0, -3)", {arrayVector}), 10); -// EXPECT_EQ(elementAtSimple("element_at(C0, -4)", {arrayVector}), std::nullopt); -// } + // #3 + EXPECT_EQ(elementAtSimple("element_at(C0, -1)", {arrayVector}), 12); + EXPECT_EQ(elementAtSimple("element_at(C0, -2)", {arrayVector}), 11); + EXPECT_EQ(elementAtSimple("element_at(C0, -3)", {arrayVector}), 10); + EXPECT_EQ(elementAtSimple("element_at(C0, -4)", {arrayVector}), std::nullopt); +} } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/FromJsonTest.cpp b/velox/functions/sparksql/tests/FromJsonTest.cpp index c3c8fbeb1718..117ca6ebd244 100644 --- a/velox/functions/sparksql/tests/FromJsonTest.cpp +++ b/velox/functions/sparksql/tests/FromJsonTest.cpp @@ -250,20 +250,20 @@ TEST_F(FromJsonTest, structWrongData) { testFromJson(input, makeRowVector({"a"}, {expected})); } -// TEST_F(FromJsonTest, invalidType) { -// auto primitiveTypeOutput = makeFlatVector({2, 2, 3}); -// auto decimalOutput = makeFlatVector({2, 2, 3}, DECIMAL(16, 7)); -// auto mapOutput = -// makeMapVector({{{1, 1}}, {{2, 2}}, {{3, 3}}}); -// auto input = makeFlatVector({R"(2)", R"({2)", R"({3)"}); -// VELOX_ASSERT_USER_THROW( -// testFromJson(input, primitiveTypeOutput), "Unsupported type BIGINT."); -// VELOX_ASSERT_USER_THROW( -// testFromJson(input, makeRowVector({"a"}, {decimalOutput})), -// "Unsupported type ROW"); -// VELOX_ASSERT_USER_THROW( -// testFromJson(input, mapOutput), "Unsupported type MAP."); -// } +TEST_F(FromJsonTest, invalidType) { + auto primitiveTypeOutput = makeFlatVector({2, 2, 3}); + auto decimalOutput = makeFlatVector({2, 2, 3}, DECIMAL(16, 7)); + auto mapOutput = + makeMapVector({{{1, 1}}, {{2, 2}}, {{3, 3}}}); + auto input = makeFlatVector({R"(2)", R"({2)", R"({3)"}); + VELOX_ASSERT_USER_THROW( + testFromJson(input, primitiveTypeOutput), "Unsupported type BIGINT."); + VELOX_ASSERT_USER_THROW( + testFromJson(input, makeRowVector({"a"}, {decimalOutput})), + "Unsupported type ROW"); + VELOX_ASSERT_USER_THROW( + testFromJson(input, mapOutput), "Unsupported type MAP."); +} TEST_F(FromJsonTest, invalidJson) { auto expected = makeNullableFlatVector( diff --git a/velox/functions/sparksql/tests/GetJsonObjectTest.cpp b/velox/functions/sparksql/tests/GetJsonObjectTest.cpp index bb812264d1e9..ec437df7b823 100644 --- a/velox/functions/sparksql/tests/GetJsonObjectTest.cpp +++ b/velox/functions/sparksql/tests/GetJsonObjectTest.cpp @@ -95,29 +95,29 @@ TEST_F(GetJsonObjectTest, basic) { "v2"); } -// TEST_F(GetJsonObjectTest, nullResult) { -// // Field not found. -// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$.hi"), std::nullopt); +TEST_F(GetJsonObjectTest, nullResult) { + // Field not found. + EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$.hi"), std::nullopt); -// // Illegal json. -// EXPECT_EQ(getJsonObject(R"({"hello"-3.5})", "$.hello"), std::nullopt); -// EXPECT_EQ(getJsonObject(R"({"a": bad, "b": string})", "$.a"), std::nullopt); + // Illegal json. + EXPECT_EQ(getJsonObject(R"({"hello"-3.5})", "$.hello"), std::nullopt); + EXPECT_EQ(getJsonObject(R"({"a": bad, "b": string})", "$.a"), std::nullopt); -// // Illegal json path. -// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$hello"), std::nullopt); -// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$."), std::nullopt); -// // The first char is not '$'. -// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", ".hello"), std::nullopt); -// // Constains '$' not in the first position. -// EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$.$hello"), std::nullopt); + // Illegal json path. + EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$hello"), std::nullopt); + EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$."), std::nullopt); + // The first char is not '$'. + EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", ".hello"), std::nullopt); + // Constains '$' not in the first position. + EXPECT_EQ(getJsonObject(R"({"hello": "3.5"})", "$.$hello"), std::nullopt); -// // Invalid ending character. -// EXPECT_EQ( -// getJsonObject( -// R"([{"my": {"info": {"name": "Alice"quoted""}}}, {"other": ["v1", "v2"]}])", -// "$[0].my.info.name"), -// std::nullopt); -// } + // Invalid ending character. + EXPECT_EQ( + getJsonObject( + R"([{"my": {"info": {"name": "Alice"quoted""}}}, {"other": ["v1", "v2"]}])", + "$[0].my.info.name"), + std::nullopt); +} TEST_F(GetJsonObjectTest, incompleteJson) { EXPECT_EQ(getJsonObject(R"({"hello": "3.5"},)", "$.hello"), "3.5"); diff --git a/velox/functions/sparksql/tests/GetStructFieldTest.cpp b/velox/functions/sparksql/tests/GetStructFieldTest.cpp index 0895194b1b9d..b2f68e945bdc 100644 --- a/velox/functions/sparksql/tests/GetStructFieldTest.cpp +++ b/velox/functions/sparksql/tests/GetStructFieldTest.cpp @@ -94,25 +94,25 @@ TEST_F(GetStructFieldTest, complexType) { testGetStructField(data, 2, colRow); } -// TEST_F(GetStructFieldTest, invalidOrdinal) { -// auto colInt = makeFlatVector({1, 2, 3, 4}); -// auto colString = makeNullableFlatVector( -// {"hello", "world", std::nullopt, "hi"}); -// auto colIntWithNull = -// makeNullableFlatVector({11, std::nullopt, 13, 14}); -// auto data = makeRowVector({colInt, colString, colIntWithNull}); - -// // Get int field. -// VELOX_ASSERT_THROW( -// testGetStructField(data, -1, colInt), -// "Invalid ordinal. Should be greater than 0."); - -// // Get string field. -// VELOX_ASSERT_THROW( -// testGetStructField(data, 4, colString), -// fmt::format( -// "(4 vs. 3) Invalid ordinal. Should be smaller than the children size of input row vector.")); -// } +TEST_F(GetStructFieldTest, invalidOrdinal) { + auto colInt = makeFlatVector({1, 2, 3, 4}); + auto colString = makeNullableFlatVector( + {"hello", "world", std::nullopt, "hi"}); + auto colIntWithNull = + makeNullableFlatVector({11, std::nullopt, 13, 14}); + auto data = makeRowVector({colInt, colString, colIntWithNull}); + + // Get int field. + VELOX_ASSERT_THROW( + testGetStructField(data, -1, colInt), + "Invalid ordinal. Should be greater than 0."); + + // Get string field. + VELOX_ASSERT_THROW( + testGetStructField(data, 4, colString), + fmt::format( + "(4 vs. 3) Invalid ordinal. Should be smaller than the children size of input row vector.")); +} } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/MakeDecimalTest.cpp b/velox/functions/sparksql/tests/MakeDecimalTest.cpp index cdb0f888a1a2..5d11bd9c0dfa 100644 --- a/velox/functions/sparksql/tests/MakeDecimalTest.cpp +++ b/velox/functions/sparksql/tests/MakeDecimalTest.cpp @@ -52,51 +52,51 @@ class MakeDecimalTest : public SparkFunctionBaseTest { } }; -// TEST_F(MakeDecimalTest, makeDecimal) { -// testMakeDecimal( -// makeFlatVector({1111, -1112, 9999, 0}), -// std::nullopt, -// makeFlatVector({1111, -1112, 9999, 0}, DECIMAL(5, 1))); -// testMakeDecimal( -// makeFlatVector({1111, -1112, 9999, 0}), -// true, -// makeFlatVector({1111, -1112, 9999, 0}, DECIMAL(5, 1))); -// testMakeDecimal( -// makeFlatVector( -// {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), -// true, -// makeFlatVector( -// {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}, -// DECIMAL(38, 19))); -// testMakeDecimal( -// makeFlatVector( -// {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), -// true, -// makeNullableFlatVector( -// {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0))); -// VELOX_ASSERT_THROW( -// testMakeDecimal( -// makeFlatVector( -// {11111111, -// -11112112, -// 99999999, -// DecimalUtil::kShortDecimalMax + 1}), -// false, -// makeNullableFlatVector( -// {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0))), -// "Unscaled value 1000000000000000000 too large for precision 18."); -// testMakeDecimal( -// makeFlatVector( -// {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), -// false, -// makeNullableFlatVector( -// {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0)), -// true /*tryMakeDecimal*/); -// testMakeDecimal( -// makeNullableFlatVector({101, std::nullopt, 1000}), -// true, -// makeNullableFlatVector( -// {101, std::nullopt, std::nullopt}, DECIMAL(3, 1))); -// } +TEST_F(MakeDecimalTest, makeDecimal) { + testMakeDecimal( + makeFlatVector({1111, -1112, 9999, 0}), + std::nullopt, + makeFlatVector({1111, -1112, 9999, 0}, DECIMAL(5, 1))); + testMakeDecimal( + makeFlatVector({1111, -1112, 9999, 0}), + true, + makeFlatVector({1111, -1112, 9999, 0}, DECIMAL(5, 1))); + testMakeDecimal( + makeFlatVector( + {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), + true, + makeFlatVector( + {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}, + DECIMAL(38, 19))); + testMakeDecimal( + makeFlatVector( + {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), + true, + makeNullableFlatVector( + {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0))); + VELOX_ASSERT_THROW( + testMakeDecimal( + makeFlatVector( + {11111111, + -11112112, + 99999999, + DecimalUtil::kShortDecimalMax + 1}), + false, + makeNullableFlatVector( + {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0))), + "Unscaled value 1000000000000000000 too large for precision 18."); + testMakeDecimal( + makeFlatVector( + {11111111, -11112112, 99999999, DecimalUtil::kShortDecimalMax + 1}), + false, + makeNullableFlatVector( + {11111111, -11112112, 99999999, std::nullopt}, DECIMAL(18, 0)), + true /*tryMakeDecimal*/); + testMakeDecimal( + makeNullableFlatVector({101, std::nullopt, 1000}), + true, + makeNullableFlatVector( + {101, std::nullopt, std::nullopt}, DECIMAL(3, 1))); +} } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/MakeTimestampTest.cpp b/velox/functions/sparksql/tests/MakeTimestampTest.cpp index f9006151debb..624058c9bdec 100644 --- a/velox/functions/sparksql/tests/MakeTimestampTest.cpp +++ b/velox/functions/sparksql/tests/MakeTimestampTest.cpp @@ -106,127 +106,127 @@ TEST_F(MakeTimestampTest, basic) { } } -// TEST_F(MakeTimestampTest, errors) { -// const auto microsType = DECIMAL(16, 6); -// const auto testInvalidInputs = [&](const RowVectorPtr& data) { -// std::vector> nullResults( -// data->size(), std::nullopt); -// auto expected = makeNullableFlatVector(nullResults); -// auto result = evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data); -// facebook::velox::test::assertEqualVectors(expected, result); -// }; -// std::optional one = 1; -// const auto testInvalidSeconds = [&](std::optional microsec) { -// auto result = evaluateOnce( -// "make_timestamp(c0, c1, c2, c3, c4, c5)", -// {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, -// one, -// one, -// one, -// one, -// one, -// microsec); -// EXPECT_EQ(result, std::nullopt); -// }; -// const auto testInvalidArguments = [&](std::optional microsec, -// const TypePtr& microsType) { -// return evaluateOnce( -// "make_timestamp(c0, c1, c2, c3, c4, c5)", -// {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, -// one, -// one, -// one, -// one, -// one, -// microsec); -// }; - -// // Throw if no session time zone. -// VELOX_ASSERT_USER_THROW( -// testInvalidArguments(60007000, DECIMAL(16, 6)), -// "make_timestamp requires session time zone to be set."); - -// setQueryTimeZone("Asia/Shanghai"); -// // Invalid input returns null. -// const auto year = makeFlatVector( -// {facebook::velox::util::kMinYear - 1, -// facebook::velox::util::kMaxYear + 1, -// 1, -// 1, -// 1, -// 1, -// 1, -// 1}); -// const auto month = makeFlatVector({1, 1, 0, 13, 1, 1, 1, 1}); -// const auto day = makeFlatVector({1, 1, 1, 1, 0, 32, 1, 1}); -// const auto hour = makeFlatVector({1, 1, 1, 1, 1, 1, 25, 1}); -// const auto minute = makeFlatVector({1, 1, 1, 1, 1, 1, 1, 61}); -// const auto micros = -// makeFlatVector({1, 1, 1, 1, 1, 1, 1, 1}, microsType); -// auto data = makeRowVector({year, month, day, hour, minute, micros}); -// testInvalidInputs(data); - -// // Seconds should be either in the range of [0,59], or 60 with zero -// // microseconds. -// testInvalidSeconds(61e6); -// testInvalidSeconds(99999999); -// testInvalidSeconds(999999999); -// testInvalidSeconds(60007000); - -// // Throw if data type for microseconds is invalid. -// VELOX_ASSERT_THROW( -// testInvalidArguments(1e6, DECIMAL(20, 6)), -// "Seconds must be short decimal type but got DECIMAL(20, 6)"); -// VELOX_ASSERT_THROW( -// testInvalidArguments(1e6, DECIMAL(16, 8)), -// "Scalar function signature is not supported: " -// "make_timestamp(INTEGER, INTEGER, INTEGER, INTEGER, INTEGER, " -// "DECIMAL(16, 8))."); -// } - -// TEST_F(MakeTimestampTest, invalidTimezone) { -// const auto microsType = DECIMAL(16, 6); -// const auto year = makeFlatVector({2021, 2021, 2021, 2021, 2021}); -// const auto month = makeFlatVector({7, 7, 7, 7, 7}); -// const auto day = makeFlatVector({11, 11, 11, 11, 11}); -// const auto hour = makeFlatVector({6, 6, 6, -6, 6}); -// const auto minute = makeFlatVector({30, 30, 30, 30, 30}); -// const auto micros = makeNullableFlatVector( -// {45678000, 1e6, 6e7, 59999999, std::nullopt}, microsType); -// auto data = makeRowVector({year, month, day, hour, minute, micros}); - -// // Time zone is not set. -// VELOX_ASSERT_USER_THROW( -// evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data), -// "make_timestamp requires session time zone to be set."); - -// // Invalid constant time zone. -// setQueryTimeZone("GMT"); -// for (auto timeZone : {"Invalid", ""}) { -// SCOPED_TRACE(fmt::format("timezone: {}", timeZone)); -// VELOX_ASSERT_USER_THROW( -// evaluate( -// fmt::format( -// "make_timestamp(c0, c1, c2, c3, c4, c5, '{}')", timeZone), -// data), -// fmt::format("Unknown time zone: '{}'", timeZone)); -// } - -// // Invalid timezone from vector. -// auto timeZones = makeFlatVector( -// {"GMT", "CET", "Asia/Shanghai", "Invalid", "GMT"}); -// data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); -// VELOX_ASSERT_USER_THROW( -// evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), -// "Unknown time zone: 'Invalid'"); - -// timeZones = -// makeFlatVector({"GMT", "CET", "Asia/Shanghai", "", "GMT"}); -// data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); -// VELOX_ASSERT_USER_THROW( -// evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), -// "Unknown time zone: ''"); -// } +TEST_F(MakeTimestampTest, errors) { + const auto microsType = DECIMAL(16, 6); + const auto testInvalidInputs = [&](const RowVectorPtr& data) { + std::vector> nullResults( + data->size(), std::nullopt); + auto expected = makeNullableFlatVector(nullResults); + auto result = evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data); + facebook::velox::test::assertEqualVectors(expected, result); + }; + std::optional one = 1; + const auto testInvalidSeconds = [&](std::optional microsec) { + auto result = evaluateOnce( + "make_timestamp(c0, c1, c2, c3, c4, c5)", + {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, + one, + one, + one, + one, + one, + microsec); + EXPECT_EQ(result, std::nullopt); + }; + const auto testInvalidArguments = [&](std::optional microsec, + const TypePtr& microsType) { + return evaluateOnce( + "make_timestamp(c0, c1, c2, c3, c4, c5)", + {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, + one, + one, + one, + one, + one, + microsec); + }; + + // Throw if no session time zone. + VELOX_ASSERT_USER_THROW( + testInvalidArguments(60007000, DECIMAL(16, 6)), + "make_timestamp requires session time zone to be set."); + + setQueryTimeZone("Asia/Shanghai"); + // Invalid input returns null. + const auto year = makeFlatVector( + {facebook::velox::util::kMinYear - 1, + facebook::velox::util::kMaxYear + 1, + 1, + 1, + 1, + 1, + 1, + 1}); + const auto month = makeFlatVector({1, 1, 0, 13, 1, 1, 1, 1}); + const auto day = makeFlatVector({1, 1, 1, 1, 0, 32, 1, 1}); + const auto hour = makeFlatVector({1, 1, 1, 1, 1, 1, 25, 1}); + const auto minute = makeFlatVector({1, 1, 1, 1, 1, 1, 1, 61}); + const auto micros = + makeFlatVector({1, 1, 1, 1, 1, 1, 1, 1}, microsType); + auto data = makeRowVector({year, month, day, hour, minute, micros}); + testInvalidInputs(data); + + // Seconds should be either in the range of [0,59], or 60 with zero + // microseconds. + testInvalidSeconds(61e6); + testInvalidSeconds(99999999); + testInvalidSeconds(999999999); + testInvalidSeconds(60007000); + + // Throw if data type for microseconds is invalid. + VELOX_ASSERT_THROW( + testInvalidArguments(1e6, DECIMAL(20, 6)), + "Seconds must be short decimal type but got DECIMAL(20, 6)"); + VELOX_ASSERT_THROW( + testInvalidArguments(1e6, DECIMAL(16, 8)), + "Scalar function signature is not supported: " + "make_timestamp(INTEGER, INTEGER, INTEGER, INTEGER, INTEGER, " + "DECIMAL(16, 8))."); +} + +TEST_F(MakeTimestampTest, invalidTimezone) { + const auto microsType = DECIMAL(16, 6); + const auto year = makeFlatVector({2021, 2021, 2021, 2021, 2021}); + const auto month = makeFlatVector({7, 7, 7, 7, 7}); + const auto day = makeFlatVector({11, 11, 11, 11, 11}); + const auto hour = makeFlatVector({6, 6, 6, -6, 6}); + const auto minute = makeFlatVector({30, 30, 30, 30, 30}); + const auto micros = makeNullableFlatVector( + {45678000, 1e6, 6e7, 59999999, std::nullopt}, microsType); + auto data = makeRowVector({year, month, day, hour, minute, micros}); + + // Time zone is not set. + VELOX_ASSERT_USER_THROW( + evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data), + "make_timestamp requires session time zone to be set."); + + // Invalid constant time zone. + setQueryTimeZone("GMT"); + for (auto timeZone : {"Invalid", ""}) { + SCOPED_TRACE(fmt::format("timezone: {}", timeZone)); + VELOX_ASSERT_USER_THROW( + evaluate( + fmt::format( + "make_timestamp(c0, c1, c2, c3, c4, c5, '{}')", timeZone), + data), + fmt::format("Unknown time zone: '{}'", timeZone)); + } + + // Invalid timezone from vector. + auto timeZones = makeFlatVector( + {"GMT", "CET", "Asia/Shanghai", "Invalid", "GMT"}); + data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); + VELOX_ASSERT_USER_THROW( + evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), + "Unknown time zone: 'Invalid'"); + + timeZones = + makeFlatVector({"GMT", "CET", "Asia/Shanghai", "", "GMT"}); + data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); + VELOX_ASSERT_USER_THROW( + evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), + "Unknown time zone: ''"); +} } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/MaskTest.cpp b/velox/functions/sparksql/tests/MaskTest.cpp index 6453553a0dba..cd5b6179a42f 100644 --- a/velox/functions/sparksql/tests/MaskTest.cpp +++ b/velox/functions/sparksql/tests/MaskTest.cpp @@ -269,30 +269,30 @@ TEST_F(MaskTest, mask) { EXPECT_EQ(maskWithThreeArg("ABCabc", "🚀", "🚀"), "🚀🚀🚀🚀🚀🚀"); } -// TEST_F(MaskTest, maskWithError) { -// std::string upperChar = "Y"; -// std::string lowerChar = "y"; -// std::string digitChar = "d"; -// std::string otherChar = "*"; -// VELOX_ASSERT_USER_THROW( -// maskWithFiveArg("AbCD123-@$#", "", lowerChar, digitChar, otherChar), -// "Replacement string must contain a single character and cannot be empty."); +TEST_F(MaskTest, maskWithError) { + std::string upperChar = "Y"; + std::string lowerChar = "y"; + std::string digitChar = "d"; + std::string otherChar = "*"; + VELOX_ASSERT_USER_THROW( + maskWithFiveArg("AbCD123-@$#", "", lowerChar, digitChar, otherChar), + "Replacement string must contain a single character and cannot be empty."); -// VELOX_ASSERT_USER_THROW( -// maskWithFiveArg("AbCD123-@$#", "🚀🚀", lowerChar, digitChar, otherChar), -// "Replacement string must contain a single character and cannot be empty."); + VELOX_ASSERT_USER_THROW( + maskWithFiveArg("AbCD123-@$#", "🚀🚀", lowerChar, digitChar, otherChar), + "Replacement string must contain a single character and cannot be empty."); -// VELOX_ASSERT_USER_THROW( -// maskWithFiveArg("AbCD123-@$#", upperChar, "", digitChar, otherChar), -// "Replacement string must contain a single character and cannot be empty."); + VELOX_ASSERT_USER_THROW( + maskWithFiveArg("AbCD123-@$#", upperChar, "", digitChar, otherChar), + "Replacement string must contain a single character and cannot be empty."); -// VELOX_ASSERT_USER_THROW( -// maskWithFiveArg("AbCD123-@$#", upperChar, lowerChar, "", otherChar), -// "Replacement string must contain a single character and cannot be empty."); + VELOX_ASSERT_USER_THROW( + maskWithFiveArg("AbCD123-@$#", upperChar, lowerChar, "", otherChar), + "Replacement string must contain a single character and cannot be empty."); -// VELOX_ASSERT_USER_THROW( -// maskWithFiveArg("AbCD123-@$#", upperChar, lowerChar, digitChar, ""), -// "Replacement string must contain a single character and cannot be empty."); -// } + VELOX_ASSERT_USER_THROW( + maskWithFiveArg("AbCD123-@$#", upperChar, lowerChar, digitChar, ""), + "Replacement string must contain a single character and cannot be empty."); +} } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/RegexFunctionsTest.cpp b/velox/functions/sparksql/tests/RegexFunctionsTest.cpp index 7324bfe59166..412ef520792c 100644 --- a/velox/functions/sparksql/tests/RegexFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/RegexFunctionsTest.cpp @@ -215,20 +215,20 @@ TEST_F(RegexFunctionsTest, allowSimpleConstantRegex) { EXPECT_EQ(rlike(std::nullopt, "a*"), std::nullopt); } -// TEST_F(RegexFunctionsTest, blockUnsupportedEdgeCases) { -// // Non-constant pattern. -// EXPECT_THROW( -// evaluateOnce("rlike('a', c0)", std::optional("a*")), -// VeloxUserError); -// } - -// TEST_F(RegexFunctionsTest, regexMatchRegistration) { -// EXPECT_THROW( -// evaluateOnce( -// "regexp_extract('a', c0)", std::optional("a*")), -// VeloxUserError); -// EXPECT_EQ(regexp_extract("abc", "a."), "ab"); -// } +TEST_F(RegexFunctionsTest, blockUnsupportedEdgeCases) { + // Non-constant pattern. + EXPECT_THROW( + evaluateOnce("rlike('a', c0)", std::optional("a*")), + VeloxUserError); +} + +TEST_F(RegexFunctionsTest, regexMatchRegistration) { + EXPECT_THROW( + evaluateOnce( + "regexp_extract('a', c0)", std::optional("a*")), + VeloxUserError); + EXPECT_EQ(regexp_extract("abc", "a."), "ab"); +} TEST_F(RegexFunctionsTest, regexpReplaceRegistration) { std::string output = "teeheebc"; diff --git a/velox/functions/sparksql/tests/SortArrayTest.cpp b/velox/functions/sparksql/tests/SortArrayTest.cpp index 8955f175340c..fa0449a37f9e 100644 --- a/velox/functions/sparksql/tests/SortArrayTest.cpp +++ b/velox/functions/sparksql/tests/SortArrayTest.cpp @@ -75,14 +75,14 @@ class SortArrayTest : public SparkFunctionBaseTest { } }; -// TEST_F(SortArrayTest, invalidInput) { -// auto arg0 = makeNullableArrayVector({{0, 1}}); -// std::vector v = {false}; -// auto arg1 = makeFlatVector(v); -// ASSERT_THROW( -// evaluate("sort_array(c0, c1)", makeRowVector({arg0, arg1})), -// VeloxException); -// } +TEST_F(SortArrayTest, invalidInput) { + auto arg0 = makeNullableArrayVector({{0, 1}}); + std::vector v = {false}; + auto arg1 = makeFlatVector(v); + ASSERT_THROW( + evaluate("sort_array(c0, c1)", makeRowVector({arg0, arg1})), + VeloxException); +} TEST_F(SortArrayTest, int8) { testInt(); diff --git a/velox/functions/sparksql/tests/SparkPartitionIdTest.cpp b/velox/functions/sparksql/tests/SparkPartitionIdTest.cpp index 94754919feba..67eaaf4ddb23 100644 --- a/velox/functions/sparksql/tests/SparkPartitionIdTest.cpp +++ b/velox/functions/sparksql/tests/SparkPartitionIdTest.cpp @@ -39,18 +39,18 @@ TEST_F(SparkPartitionIdTest, basic) { testSparkPartitionId(100, 100); } -// TEST_F(SparkPartitionIdTest, error) { -// auto rowVector = makeRowVector(ROW({}), 1); +TEST_F(SparkPartitionIdTest, error) { + auto rowVector = makeRowVector(ROW({}), 1); -// queryCtx_->testingOverrideConfigUnsafe({{}}); -// VELOX_ASSERT_THROW( -// evaluate("spark_partition_id()", rowVector), -// "Spark partition id is not set"); + queryCtx_->testingOverrideConfigUnsafe({{}}); + VELOX_ASSERT_THROW( + evaluate("spark_partition_id()", rowVector), + "Spark partition id is not set"); -// setSparkPartitionId(-1); -// VELOX_ASSERT_THROW( -// evaluate("spark_partition_id()", rowVector), -// "Invalid Spark partition id"); -// } + setSparkPartitionId(-1); + VELOX_ASSERT_THROW( + evaluate("spark_partition_id()", rowVector), + "Invalid Spark partition id"); +} } // namespace } // namespace facebook::velox::functions::sparksql::test From 081e9bb0da9a6ec28dee8782cef5e9dbe201bdbc Mon Sep 17 00:00:00 2001 From: zouyunhe Date: Mon, 8 Sep 2025 10:01:41 +0000 Subject: [PATCH 6/9] remove useless changes --- velox/dwio/parquet/writer/arrow/CMakeLists.txt | 6 +++--- velox/functions/sparksql/fuzzer/CMakeLists.txt | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/velox/dwio/parquet/writer/arrow/CMakeLists.txt b/velox/dwio/parquet/writer/arrow/CMakeLists.txt index 4b91c85aebf6..fb0e4a4bb7df 100644 --- a/velox/dwio/parquet/writer/arrow/CMakeLists.txt +++ b/velox/dwio/parquet/writer/arrow/CMakeLists.txt @@ -14,9 +14,9 @@ add_subdirectory(util) -#if(${VELOX_BUILD_TESTING}) - # add_subdirectory(tests) -#endif() +if(${VELOX_BUILD_TESTING}) + add_subdirectory(tests) +endif() velox_add_library( velox_dwio_arrow_parquet_writer_lib diff --git a/velox/functions/sparksql/fuzzer/CMakeLists.txt b/velox/functions/sparksql/fuzzer/CMakeLists.txt index 2030960dfd87..ecb0ac20f709 100644 --- a/velox/functions/sparksql/fuzzer/CMakeLists.txt +++ b/velox/functions/sparksql/fuzzer/CMakeLists.txt @@ -113,6 +113,6 @@ target_link_libraries( GTest::gtest GTest::gtest_main) -#if(${VELOX_BUILD_TESTING}) -# add_subdirectory(tests) -#endif() +if(${VELOX_BUILD_TESTING}) + add_subdirectory(tests) +endif() From e4bfeb6b88a1b77c1cba60428227c3dbecfbccb6 Mon Sep 17 00:00:00 2001 From: zouyunhe Date: Wed, 10 Sep 2025 04:25:02 +0000 Subject: [PATCH 7/9] support split_index --- .../experimental/stateful/udf/CMakeLists.txt | 4 ++ velox/experimental/stateful/udf/Register.cpp | 3 + velox/experimental/stateful/udf/SplitIndex.h | 56 +++++++++++++++++++ .../stateful/udf/tests/CMakeLists.txt | 25 +++++++++ .../stateful/udf/tests/UDFTest.cpp | 56 +++++++++++++++++++ velox/functions/sparksql/DateTimeFunctions.h | 11 ---- velox/functions/sparksql/String.h | 33 ----------- .../registration/RegisterDatetime.cpp | 2 - .../sparksql/registration/RegisterString.cpp | 2 - velox/functions/sparksql/tests/StringTest.cpp | 17 ------ 10 files changed, 144 insertions(+), 65 deletions(-) create mode 100644 velox/experimental/stateful/udf/SplitIndex.h create mode 100644 velox/experimental/stateful/udf/tests/CMakeLists.txt create mode 100644 velox/experimental/stateful/udf/tests/UDFTest.cpp diff --git a/velox/experimental/stateful/udf/CMakeLists.txt b/velox/experimental/stateful/udf/CMakeLists.txt index e9eba89c73b7..0f0169ce136c 100644 --- a/velox/experimental/stateful/udf/CMakeLists.txt +++ b/velox/experimental/stateful/udf/CMakeLists.txt @@ -16,3 +16,7 @@ velox_add_library( velox_stateful_udf OBJECT Register.cpp) + +if(${VELOX_BUILD_TESTING}) + add_subdirectory(tests) +endif() \ No newline at end of file diff --git a/velox/experimental/stateful/udf/Register.cpp b/velox/experimental/stateful/udf/Register.cpp index 27b473099534..fe0555a8564e 100644 --- a/velox/experimental/stateful/udf/Register.cpp +++ b/velox/experimental/stateful/udf/Register.cpp @@ -16,6 +16,7 @@ #include "velox/experimental/stateful/udf/Register.h" #include "velox/experimental/stateful/udf/BigoUDF.h" #include "velox/experimental/stateful/udf/ExtractDateTime.h" +#include "velox/experimental/stateful/udf/SplitIndex.h" #include "velox/functions/Registerer.h" namespace facebook::velox::stateful::udf { @@ -25,6 +26,8 @@ void registerFunctions(const std::string& prefix) { {prefix + "count_char"}); registerFunction( {prefix + "extract"}); + registerFunction( + {prefix + "split_index"}); } } // namespace facebook::velox::stateful::udf diff --git a/velox/experimental/stateful/udf/SplitIndex.h b/velox/experimental/stateful/udf/SplitIndex.h new file mode 100644 index 000000000000..8eab98eaa4b2 --- /dev/null +++ b/velox/experimental/stateful/udf/SplitIndex.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/functions/lib/string/StringImpl.h" +#include "velox/functions/Macros.h" + +namespace facebook::velox::stateful::udf { + +template +struct SplitIndexFunction { + VELOX_DEFINE_FUNCTION_TYPES(T); + + // Results refer to strings in the first argument. + static constexpr int32_t reuse_strings_from_arg = 0; + + // ASCII input always produces ASCII result. + static constexpr bool is_default_ascii_behavior = true; + + FOLLY_ALWAYS_INLINE bool call( + out_type& result, + const arg_type& input, + const arg_type& delimiter, + const int64_t& index) { + if (index <= 0) { + return false; + } + return functions::stringImpl::splitPart(result, input, delimiter, index); + } + + FOLLY_ALWAYS_INLINE bool callAscii( + out_type& result, + const arg_type& input, + const arg_type& delimiter, + const int64_t& index) { + if (index <= 0) { + return false; + } + return functions::stringImpl::splitPart(result, input, delimiter, index); + } +}; + +} \ No newline at end of file diff --git a/velox/experimental/stateful/udf/tests/CMakeLists.txt b/velox/experimental/stateful/udf/tests/CMakeLists.txt new file mode 100644 index 000000000000..76daf5737ba5 --- /dev/null +++ b/velox/experimental/stateful/udf/tests/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +add_executable(velox_stateful_udf_test UDFTest.cpp) + +add_test(velox_stateful_udf_test velox_stateful_udf_test) + +target_link_libraries( + velox_stateful_udf_test + velox_vector_test_lib + velox_exec_test_lib + velox_exec + velox_common_base + GTest::gtest + GTest::gtest_main) \ No newline at end of file diff --git a/velox/experimental/stateful/udf/tests/UDFTest.cpp b/velox/experimental/stateful/udf/tests/UDFTest.cpp new file mode 100644 index 000000000000..152ef1d7df22 --- /dev/null +++ b/velox/experimental/stateful/udf/tests/UDFTest.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" +#include "velox/parse/TypeResolver.h" +#include "velox/experimental/stateful/udf/Register.h" + +#include +#include + +namespace facebook::velox::udf::test { + +class UDFTest : public functions::test::FunctionBaseTest { +protected: + static void SetUpTestCase() { + parse::registerTypeResolver(); + stateful::udf::registerFunctions(""); + memory::MemoryManager::testingSetInstance({}); + } +}; + +TEST_F(UDFTest, splitIndex) { + const auto splitIndex = [&](const std::optional& a, + const std::optional& d, const std::optional& i) { + return evaluateOnce("split_index(c0, c1, c2)", a, d, i); + }; + EXPECT_EQ(splitIndex("a/b/c", "/", 1), "a"); + EXPECT_EQ(splitIndex("a/b/c", "/", 2), "b"); + const std::optional res0 = splitIndex("a/b/c", "/", 0); + const std::optional res1 = splitIndex("a/b/c", "/", -1); + const std::optional res2 = splitIndex("a/b/c", "/", 4); + EXPECT_EQ(res0.has_value(), false); + EXPECT_EQ(res1.has_value(), false); + EXPECT_EQ(res2.has_value(), false); +} + +} + +int main(int argc, char* argv[]) { + testing::InitGoogleTest(&argc, argv); + folly::Init init(&argc, &argv, false); + gflags::ParseCommandLineFlags(&argc, &argv, true); // Parse gflags + return RUN_ALL_TESTS(); +} diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 56d2268e5edf..2da694fb9e0a 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -143,17 +143,6 @@ struct UnixTimestampFunction { } }; -template -struct CurrentTimestampFunction { - VELOX_DEFINE_FUNCTION_TYPES(T); - - FOLLY_ALWAYS_INLINE bool call( - out_type& result) { - result = Timestamp::now(); - return true; - } -}; - template struct UnixTimestampParseFunction { VELOX_DEFINE_FUNCTION_TYPES(T); diff --git a/velox/functions/sparksql/String.h b/velox/functions/sparksql/String.h index 4755c7ecf9be..b90b9b851d33 100644 --- a/velox/functions/sparksql/String.h +++ b/velox/functions/sparksql/String.h @@ -1556,37 +1556,4 @@ struct Empty2NullFunction { } }; -template -struct SplitIndex { - VELOX_DEFINE_FUNCTION_TYPES(T); - - // Results refer to strings in the first argument. - static constexpr int32_t reuse_strings_from_arg = 0; - - // ASCII input always produces ASCII result. - static constexpr bool is_default_ascii_behavior = true; - - FOLLY_ALWAYS_INLINE bool call( - out_type& result, - const arg_type& input, - const arg_type& delimiter, - const int64_t& index) { - if (index <= 0) { - return false; - } - return stringImpl::splitPart(result, input, delimiter, index); - } - - FOLLY_ALWAYS_INLINE bool callAscii( - out_type& result, - const arg_type& input, - const arg_type& delimiter, - const int64_t& index) { - if (index <= 0) { - return false; - } - return stringImpl::splitPart(result, input, delimiter, index); - } -}; - } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/registration/RegisterDatetime.cpp b/velox/functions/sparksql/registration/RegisterDatetime.cpp index 6c5005a4dae2..0625ae202544 100644 --- a/velox/functions/sparksql/registration/RegisterDatetime.cpp +++ b/velox/functions/sparksql/registration/RegisterDatetime.cpp @@ -94,8 +94,6 @@ void registerDatetimeFunctions(const std::string& prefix) { {prefix + "timestamp_millis"}); registerFunction( {prefix + "date_trunc"}); - registerFunction( - {prefix + "current_timestamp"}); } } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/registration/RegisterString.cpp b/velox/functions/sparksql/registration/RegisterString.cpp index ff6a2a4c4122..007b77868a1a 100644 --- a/velox/functions/sparksql/registration/RegisterString.cpp +++ b/velox/functions/sparksql/registration/RegisterString.cpp @@ -147,8 +147,6 @@ void registerStringFunctions(const std::string& prefix) { registerFunctionCallToSpecialForm( ConcatWsCallToSpecialForm::kConcatWs, std::make_unique()); - registerFunction( - {prefix + "split_index"}); } } // namespace sparksql } // namespace facebook::velox::functions diff --git a/velox/functions/sparksql/tests/StringTest.cpp b/velox/functions/sparksql/tests/StringTest.cpp index b32748b941a2..7f8e0707fd41 100644 --- a/velox/functions/sparksql/tests/StringTest.cpp +++ b/velox/functions/sparksql/tests/StringTest.cpp @@ -17,9 +17,7 @@ #include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h" #include "velox/type/Type.h" -#include #include -#include namespace facebook::velox::functions::sparksql::test { namespace { @@ -1058,20 +1056,5 @@ TEST_F(StringTest, empty2Null) { EXPECT_EQ(empty2Null("abc"), "abc"); } -TEST_F(StringTest, splitIndex) { - const auto splitIndex = [&](const std::optional& a, - const std::optional& d, const std::optional& i) { - return evaluateOnce("split_index(c0, c1, c2)", a, d, i); - }; - EXPECT_EQ(splitIndex("a/b/c", "/", 1), "a"); - EXPECT_EQ(splitIndex("a/b/c", "/", 2), "b"); - const std::optional res0 = splitIndex("a/b/c", "/", 0); - const std::optional res1 = splitIndex("a/b/c", "/", -1); - const std::optional res2 = splitIndex("a/b/c", "/", 4); - EXPECT_EQ(res0.has_value(), false); - EXPECT_EQ(res1.has_value(), false); - EXPECT_EQ(res2.has_value(), false); -} - } // namespace } // namespace facebook::velox::functions::sparksql::test From 3ec8700ab1ff5898a3b914461ae49ede178db7a7 Mon Sep 17 00:00:00 2001 From: zouyunhe Date: Wed, 10 Sep 2025 04:37:30 +0000 Subject: [PATCH 8/9] codestyle --- velox/experimental/stateful/udf/SplitIndex.h | 12 +++++++----- .../experimental/stateful/udf/tests/CMakeLists.txt | 2 +- velox/experimental/stateful/udf/tests/UDFTest.cpp | 13 +++++++------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/velox/experimental/stateful/udf/SplitIndex.h b/velox/experimental/stateful/udf/SplitIndex.h index 8eab98eaa4b2..ac7409f06cd8 100644 --- a/velox/experimental/stateful/udf/SplitIndex.h +++ b/velox/experimental/stateful/udf/SplitIndex.h @@ -15,12 +15,12 @@ */ #pragma once -#include "velox/functions/lib/string/StringImpl.h" #include "velox/functions/Macros.h" +#include "velox/functions/lib/string/StringImpl.h" namespace facebook::velox::stateful::udf { -template +template struct SplitIndexFunction { VELOX_DEFINE_FUNCTION_TYPES(T); @@ -38,7 +38,8 @@ struct SplitIndexFunction { if (index <= 0) { return false; } - return functions::stringImpl::splitPart(result, input, delimiter, index); + return functions::stringImpl::splitPart( + result, input, delimiter, index); } FOLLY_ALWAYS_INLINE bool callAscii( @@ -49,8 +50,9 @@ struct SplitIndexFunction { if (index <= 0) { return false; } - return functions::stringImpl::splitPart(result, input, delimiter, index); + return functions::stringImpl::splitPart( + result, input, delimiter, index); } }; -} \ No newline at end of file +} // namespace facebook::velox::stateful::udf \ No newline at end of file diff --git a/velox/experimental/stateful/udf/tests/CMakeLists.txt b/velox/experimental/stateful/udf/tests/CMakeLists.txt index 76daf5737ba5..255edc09cfda 100644 --- a/velox/experimental/stateful/udf/tests/CMakeLists.txt +++ b/velox/experimental/stateful/udf/tests/CMakeLists.txt @@ -22,4 +22,4 @@ target_link_libraries( velox_exec velox_common_base GTest::gtest - GTest::gtest_main) \ No newline at end of file + GTest::gtest_main) diff --git a/velox/experimental/stateful/udf/tests/UDFTest.cpp b/velox/experimental/stateful/udf/tests/UDFTest.cpp index 152ef1d7df22..9173187dc218 100644 --- a/velox/experimental/stateful/udf/tests/UDFTest.cpp +++ b/velox/experimental/stateful/udf/tests/UDFTest.cpp @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "velox/experimental/stateful/udf/Register.h" #include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" #include "velox/parse/TypeResolver.h" -#include "velox/experimental/stateful/udf/Register.h" #include #include @@ -23,7 +23,7 @@ namespace facebook::velox::udf::test { class UDFTest : public functions::test::FunctionBaseTest { -protected: + protected: static void SetUpTestCase() { parse::registerTypeResolver(); stateful::udf::registerFunctions(""); @@ -33,11 +33,12 @@ class UDFTest : public functions::test::FunctionBaseTest { TEST_F(UDFTest, splitIndex) { const auto splitIndex = [&](const std::optional& a, - const std::optional& d, const std::optional& i) { - return evaluateOnce("split_index(c0, c1, c2)", a, d, i); + const std::optional& d, + const std::optional& i) { + return evaluateOnce("split_index(c0, c1, c2)", a, d, i); }; EXPECT_EQ(splitIndex("a/b/c", "/", 1), "a"); - EXPECT_EQ(splitIndex("a/b/c", "/", 2), "b"); + EXPECT_EQ(splitIndex("a/b/c", "/", 2), "b"); const std::optional res0 = splitIndex("a/b/c", "/", 0); const std::optional res1 = splitIndex("a/b/c", "/", -1); const std::optional res2 = splitIndex("a/b/c", "/", 4); @@ -46,7 +47,7 @@ TEST_F(UDFTest, splitIndex) { EXPECT_EQ(res2.has_value(), false); } -} +} // namespace facebook::velox::udf::test int main(int argc, char* argv[]) { testing::InitGoogleTest(&argc, argv); From 7392d11ef8935676de79a80c943ef03cd65a97b5 Mon Sep 17 00:00:00 2001 From: zouyunhe Date: Wed, 10 Sep 2025 04:38:32 +0000 Subject: [PATCH 9/9] remove useless changes --- velox/functions/sparksql/tests/StringTest.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/velox/functions/sparksql/tests/StringTest.cpp b/velox/functions/sparksql/tests/StringTest.cpp index 7f8e0707fd41..419488eb5e5e 100644 --- a/velox/functions/sparksql/tests/StringTest.cpp +++ b/velox/functions/sparksql/tests/StringTest.cpp @@ -1055,6 +1055,5 @@ TEST_F(StringTest, empty2Null) { EXPECT_EQ(empty2Null(""), std::nullopt); EXPECT_EQ(empty2Null("abc"), "abc"); } - } // namespace } // namespace facebook::velox::functions::sparksql::test