diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 4bcee62c37a..7a266771389 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -3278,14 +3278,103 @@ def test_error_sparse(self): def test_safe_cast_from_float_with_nans_to_int(): - # TODO(kszucs): write tests for creating Date32 and Date64 arrays, see - # ARROW-4258 and https://github.com/apache/arrow/pull/3395 values = pd.Series([1, 2, None, 4]) arr = pa.Array.from_pandas(values, type=pa.int32(), safe=True) expected = pa.array([1, 2, None, 4], type=pa.int32()) assert arr.equals(expected) +def test_create_date32_and_date64_arrays_with_mask(): + # Test Date32 array creation from Python list with mask + arr_date32 = pa.array([0, 0, 1, 2], + mask=[False, False, True, False], + type=pa.date32()) + expected_date32 = pa.array([ + date(1970, 1, 1), + date(1970, 1, 1), + None, + date(1970, 1, 3), + ], type=pa.date32()) + assert arr_date32.equals(expected_date32) + + # Test Date32 array creation from Python dates + arr_date32_dates = pa.array([ + date(2023, 1, 1), + date(2023, 1, 2), + None, + date(2023, 1, 4), + ], type=pa.date32()) + assert arr_date32_dates.null_count == 1 + assert arr_date32_dates[2].as_py() is None + + # Test Date64 array creation from Python list with mask + arr_date64 = pa.array([0, 86400000, 172800000, 259200000], + mask=[False, False, True, False], + type=pa.date64()) + expected_date64 = pa.array([ + date(1970, 1, 1), + date(1970, 1, 2), + None, + date(1970, 1, 4), + ], type=pa.date64()) + assert arr_date64.equals(expected_date64) + + # Test Date64 array creation from Python dates + arr_date64_dates = pa.array([ + date(2023, 1, 1), + date(2023, 1, 2), + None, + date(2023, 1, 4), + ], type=pa.date64()) + assert arr_date64_dates.null_count == 1 + assert arr_date64_dates[2].as_py() is None + + # Test Date32 with all nulls mask + arr_all_null = pa.array([0, 1, 2, 3], + mask=[True, True, True, True], + type=pa.date32()) + assert arr_all_null.null_count == 4 + + # Test Date64 with no nulls + arr_no_null = pa.array([0, 86400000, 172800000], + mask=[False, False, False], + type=pa.date64()) + assert arr_no_null.null_count == 0 + + # Test Date32 from NumPy datetime64[D] array with mask + arr_np_date32 = np.array([0, 1, 2, 3], dtype='datetime64[D]') + mask_np_date32 = np.array([False, False, True, False]) + result_np_date32 = pa.array(arr_np_date32, + mask=mask_np_date32, + type=pa.date32()) + expected_np_date32 = pa.array([ + date(1970, 1, 1), + date(1970, 1, 2), + None, + date(1970, 1, 4), + ], type=pa.date32()) + assert result_np_date32.equals(expected_np_date32) + assert result_np_date32.null_count == 1 + assert result_np_date32[2].as_py() is None + + # Test Date64 from NumPy datetime64[ms] array with mask + arr_np_date64 = np.array( + [0, 86400000, 172800000, 259200000], dtype='datetime64[ms]') + mask_np_date64 = np.array([False, True, False, False]) + result_np_date64 = pa.array(arr_np_date64, + mask=mask_np_date64, + type=pa.date64()) + expected_np_date64 = pa.array([ + date(1970, 1, 1), + None, + date(1970, 1, 3), + date(1970, 1, 4), + ], type=pa.date64()) + assert result_np_date64.equals(expected_np_date64) + assert result_np_date64.null_count == 1 + assert result_np_date64[1].as_py() is None + + def _fully_loaded_dataframe_example(): index = pd.MultiIndex.from_arrays([ pd.date_range('2000-01-01', periods=5).repeat(2),