Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 91 additions & 2 deletions python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3278,14 +3278,103 @@ def test_error_sparse(self):


def test_safe_cast_from_float_with_nans_to_int():
# TODO(kszucs): write tests for creating Date32 and Date64 arrays, see
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is for Date32 and Date64 types, is it correct to remove this comment? (New tests are float -> int)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was assuming that it is a mistake (from reading #3395 (comment)) ;-).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it is a bit confusing. Maybe some info can be found in the code in this comment: #3395 (comment)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks. Let me try!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think current tests fit better! One more question, would we also use numpy when creating date arrays?

# ARROW-4258 and https://github.com/apache/arrow/pull/3395
values = pd.Series([1, 2, None, 4])
arr = pa.Array.from_pandas(values, type=pa.int32(), safe=True)
expected = pa.array([1, 2, None, 4], type=pa.int32())
assert arr.equals(expected)


def test_create_date32_and_date64_arrays_with_mask():
# Test Date32 array creation from Python list with mask
arr_date32 = pa.array([0, 0, 1, 2],
mask=[False, False, True, False],
type=pa.date32())
expected_date32 = pa.array([
date(1970, 1, 1),
date(1970, 1, 1),
None,
date(1970, 1, 3),
], type=pa.date32())
assert arr_date32.equals(expected_date32)

# Test Date32 array creation from Python dates
arr_date32_dates = pa.array([
date(2023, 1, 1),
date(2023, 1, 2),
None,
date(2023, 1, 4),
], type=pa.date32())
assert arr_date32_dates.null_count == 1
assert arr_date32_dates[2].as_py() is None

# Test Date64 array creation from Python list with mask
arr_date64 = pa.array([0, 86400000, 172800000, 259200000],
mask=[False, False, True, False],
type=pa.date64())
expected_date64 = pa.array([
date(1970, 1, 1),
date(1970, 1, 2),
None,
date(1970, 1, 4),
], type=pa.date64())
assert arr_date64.equals(expected_date64)

# Test Date64 array creation from Python dates
arr_date64_dates = pa.array([
date(2023, 1, 1),
date(2023, 1, 2),
None,
date(2023, 1, 4),
], type=pa.date64())
assert arr_date64_dates.null_count == 1
assert arr_date64_dates[2].as_py() is None

# Test Date32 with all nulls mask
arr_all_null = pa.array([0, 1, 2, 3],
mask=[True, True, True, True],
type=pa.date32())
assert arr_all_null.null_count == 4

# Test Date64 with no nulls
arr_no_null = pa.array([0, 86400000, 172800000],
mask=[False, False, False],
type=pa.date64())
assert arr_no_null.null_count == 0

# Test Date32 from NumPy datetime64[D] array with mask
arr_np_date32 = np.array([0, 1, 2, 3], dtype='datetime64[D]')
mask_np_date32 = np.array([False, False, True, False])
result_np_date32 = pa.array(arr_np_date32,
mask=mask_np_date32,
type=pa.date32())
expected_np_date32 = pa.array([
date(1970, 1, 1),
date(1970, 1, 2),
None,
date(1970, 1, 4),
], type=pa.date32())
assert result_np_date32.equals(expected_np_date32)
assert result_np_date32.null_count == 1
assert result_np_date32[2].as_py() is None

# Test Date64 from NumPy datetime64[ms] array with mask
arr_np_date64 = np.array(
[0, 86400000, 172800000, 259200000], dtype='datetime64[ms]')
mask_np_date64 = np.array([False, True, False, False])
result_np_date64 = pa.array(arr_np_date64,
mask=mask_np_date64,
type=pa.date64())
expected_np_date64 = pa.array([
date(1970, 1, 1),
None,
date(1970, 1, 3),
date(1970, 1, 4),
], type=pa.date64())
assert result_np_date64.equals(expected_np_date64)
assert result_np_date64.null_count == 1
assert result_np_date64[1].as_py() is None


def _fully_loaded_dataframe_example():
index = pd.MultiIndex.from_arrays([
pd.date_range('2000-01-01', periods=5).repeat(2),
Expand Down
Loading