From ea420276627428d83fb2cf443ef045b45693fa04 Mon Sep 17 00:00:00 2001 From: Dhruv Segat Date: Wed, 7 May 2025 10:39:08 +0100 Subject: [PATCH 1/7] Make it parse ISO compliant strings --- vortexasdk/result_conversions.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/vortexasdk/result_conversions.py b/vortexasdk/result_conversions.py index 8efaae88..be21595c 100644 --- a/vortexasdk/result_conversions.py +++ b/vortexasdk/result_conversions.py @@ -26,7 +26,14 @@ def format_datatypes(df: pd.DataFrame) -> pd.DataFrame: timestamp_cols = [col for col in df.columns if "timestamp" in col] for col in timestamp_cols: - df[col] = pd.to_datetime(df[col]) + if df[col].dtype != "object": + df[col] = pd.to_datetime(df[col]) + else: + try: + df[col] = pd.to_datetime(df[col], format="ISO8601") + except (ValueError, pd.errors.ParserError): + logger.debug("Failed to parse ISO8601 format, trying default") + df[col] = pd.to_datetime(df[col]) return df From a3cf71b1fc6c199c32a1ac21ea00558ae9a4a1b2 Mon Sep 17 00:00:00 2001 From: Dhruv Segat Date: Thu, 8 May 2025 09:59:35 +0100 Subject: [PATCH 2/7] Add tests Bump version. --- tests/test_result_conversions.py | 36 ++++++++++++++++++++++++++++++++ vortexasdk/version.py | 2 +- 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 tests/test_result_conversions.py diff --git a/tests/test_result_conversions.py b/tests/test_result_conversions.py new file mode 100644 index 00000000..09218507 --- /dev/null +++ b/tests/test_result_conversions.py @@ -0,0 +1,36 @@ +import pandas as pd + +from vortexasdk.result_conversions import format_datatypes + + +def test_format_datatypes(): + # Create a sample DataFrame with different timestamp formats + data = { + "date_col_dont_convert": ["2023-10-01", "2023-10-02"], + "date_col_timestamp": ["2023-10-01", "2023-10-02"], + "mixed_iso_format_timestamps": [ + "2023-10-03 12:00:00", + "2024-01-01 01:01:01.000001", + ], + "unix_timestamp": [1696156800, 1696243200], + "int_values": [1, 2], + } + df = pd.DataFrame(data, dtype=str) + df["unix_timestamp"] = df["unix_timestamp"].astype(int) + df["int_values"] = df["int_values"].astype(int) + + # Call the function to format datatypes + formatted_df = format_datatypes(df) + + # Check if the columns are converted to datetime + assert not pd.api.types.is_datetime64_any_dtype( + formatted_df["date_col_dont_convert"] + ) + assert not pd.api.types.is_datetime64_any_dtype(formatted_df["int_values"]) + assert pd.api.types.is_datetime64_any_dtype( + formatted_df["date_col_timestamp"] + ) + assert pd.api.types.is_datetime64_any_dtype( + formatted_df["mixed_iso_format_timestamps"] + ) + assert pd.api.types.is_datetime64_any_dtype(formatted_df["unix_timestamp"]) diff --git a/vortexasdk/version.py b/vortexasdk/version.py index e13bd590..39e0411d 100644 --- a/vortexasdk/version.py +++ b/vortexasdk/version.py @@ -1 +1 @@ -__version__ = "1.0.8" +__version__ = "1.0.9" From 52bfbc29a380667d17d1ff879f24fe9869086f89 Mon Sep 17 00:00:00 2001 From: Dhruv Segat Date: Thu, 8 May 2025 10:25:51 +0100 Subject: [PATCH 3/7] Assert values --- tests/test_result_conversions.py | 51 ++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/tests/test_result_conversions.py b/tests/test_result_conversions.py index 09218507..b0d353d4 100644 --- a/tests/test_result_conversions.py +++ b/tests/test_result_conversions.py @@ -1,3 +1,5 @@ +from datetime import datetime + import pandas as pd from vortexasdk.result_conversions import format_datatypes @@ -6,14 +8,19 @@ def test_format_datatypes(): # Create a sample DataFrame with different timestamp formats data = { - "date_col_dont_convert": ["2023-10-01", "2023-10-02"], - "date_col_timestamp": ["2023-10-01", "2023-10-02"], + "date_col_dont_convert": ["2023-10-01", "2023-10-02", "2023-07-01"], + "date_col_timestamp": ["2023-10-01", "2023-10-02", "2023-07-01"], "mixed_iso_format_timestamps": [ "2023-10-03 12:00:00", "2024-01-01 01:01:01.000001", + "2025-07-01T00:00:00", + ], + "unix_timestamp": [ + 1696118400000000000, + 1696204800000000000, + 1688169600000000000, ], - "unix_timestamp": [1696156800, 1696243200], - "int_values": [1, 2], + "int_values": [1, 2, 3], } df = pd.DataFrame(data, dtype=str) df["unix_timestamp"] = df["unix_timestamp"].astype(int) @@ -27,10 +34,36 @@ def test_format_datatypes(): formatted_df["date_col_dont_convert"] ) assert not pd.api.types.is_datetime64_any_dtype(formatted_df["int_values"]) - assert pd.api.types.is_datetime64_any_dtype( - formatted_df["date_col_timestamp"] + pd.testing.assert_series_equal( + formatted_df["date_col_timestamp"], + pd.Series( + [ + datetime(2023, 10, 1), + datetime(2023, 10, 2), + datetime(2023, 7, 1), + ] + ), + check_names=False, + ) + pd.testing.assert_series_equal( + formatted_df["mixed_iso_format_timestamps"], + pd.Series( + [ + datetime(2023, 10, 3, 12), + datetime(2024, 1, 1, 1, 1, 1, 1), + datetime(2025, 7, 1), + ] + ), + check_names=False, ) - assert pd.api.types.is_datetime64_any_dtype( - formatted_df["mixed_iso_format_timestamps"] + pd.testing.assert_series_equal( + formatted_df["unix_timestamp"], + pd.Series( + [ + datetime(2023, 10, 1), + datetime(2023, 10, 2), + datetime(2023, 7, 1), + ] + ), + check_names=False, ) - assert pd.api.types.is_datetime64_any_dtype(formatted_df["unix_timestamp"]) From 8ed0617b8219d08fc5224b44d91d79ea2b970432 Mon Sep 17 00:00:00 2001 From: Dhruv Segat Date: Thu, 8 May 2025 11:51:32 +0100 Subject: [PATCH 4/7] Parameterise tests --- tests/test_result_conversions.py | 109 +++++++++++++++++++------------ 1 file changed, 68 insertions(+), 41 deletions(-) diff --git a/tests/test_result_conversions.py b/tests/test_result_conversions.py index b0d353d4..29d748bc 100644 --- a/tests/test_result_conversions.py +++ b/tests/test_result_conversions.py @@ -1,29 +1,18 @@ from datetime import datetime import pandas as pd +import pytest from vortexasdk.result_conversions import format_datatypes -def test_format_datatypes(): +def test_format_datatypes_does_not_convert_columns_without_timestamp_suffix(): # Create a sample DataFrame with different timestamp formats data = { "date_col_dont_convert": ["2023-10-01", "2023-10-02", "2023-07-01"], - "date_col_timestamp": ["2023-10-01", "2023-10-02", "2023-07-01"], - "mixed_iso_format_timestamps": [ - "2023-10-03 12:00:00", - "2024-01-01 01:01:01.000001", - "2025-07-01T00:00:00", - ], - "unix_timestamp": [ - 1696118400000000000, - 1696204800000000000, - 1688169600000000000, - ], "int_values": [1, 2, 3], } df = pd.DataFrame(data, dtype=str) - df["unix_timestamp"] = df["unix_timestamp"].astype(int) df["int_values"] = df["int_values"].astype(int) # Call the function to format datatypes @@ -34,36 +23,74 @@ def test_format_datatypes(): formatted_df["date_col_dont_convert"] ) assert not pd.api.types.is_datetime64_any_dtype(formatted_df["int_values"]) - pd.testing.assert_series_equal( - formatted_df["date_col_timestamp"], - pd.Series( - [ - datetime(2023, 10, 1), - datetime(2023, 10, 2), - datetime(2023, 7, 1), - ] + + +@pytest.mark.parametrize( + "timestamp_series,col_name,expected_result", + ( + pytest.param( + pd.Series(["2023-10-01", "2023-10-02", "2023-07-01"], dtype=str), + "date_col_timestamp", + pd.Series( + [ + datetime(2023, 10, 1), + datetime(2023, 10, 2), + datetime(2023, 7, 1), + ] + ), + id="dates are parsed", ), - check_names=False, - ) - pd.testing.assert_series_equal( - formatted_df["mixed_iso_format_timestamps"], - pd.Series( - [ - datetime(2023, 10, 3, 12), - datetime(2024, 1, 1, 1, 1, 1, 1), - datetime(2025, 7, 1), - ] + pytest.param( + pd.Series( + [ + "2023-10-03 12:00:00", + "2024-01-01 01:01:01.000001", + "2025-07-01T00:00:00", + ], + dtype=str, + ), + "mixed_iso_format_timestamps", + pd.Series( + [ + datetime(2023, 10, 3, 12), + datetime(2024, 1, 1, 1, 1, 1, 1), + datetime(2025, 7, 1), + ] + ), + id="mixed ISO formats are parsed", ), - check_names=False, - ) - pd.testing.assert_series_equal( - formatted_df["unix_timestamp"], - pd.Series( - [ - datetime(2023, 10, 1), - datetime(2023, 10, 2), - datetime(2023, 7, 1), - ] + pytest.param( + pd.Series( + [ + 1696118400000000000, + 1696204800000000000, + 1688169600000000000, + ], + ), + "unix_timestamp", + pd.Series( + [ + datetime(2023, 10, 1), + datetime(2023, 10, 2), + datetime(2023, 7, 1), + ] + ), + id="timestamps ints are parsed", ), + ), +) +def test_format_datatypes_converts_columns_with_timestamp_suffix( + timestamp_series: pd.Series, col_name: str, expected_result: pd.Series +): + # Create a sample DataFrame with different timestamp formats + df = pd.DataFrame({col_name: timestamp_series}) + + # Call the function to format datatypes + formatted_df = format_datatypes(df) + + # Check if the columns are converted to datetime + pd.testing.assert_series_equal( + formatted_df[col_name], + expected_result, check_names=False, ) From 34e8797f2fca4a0219fef00593f0d573d81bfad0 Mon Sep 17 00:00:00 2001 From: Dhruv Segat Date: Thu, 8 May 2025 11:53:14 +0100 Subject: [PATCH 5/7] Update comment --- tests/test_result_conversions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_result_conversions.py b/tests/test_result_conversions.py index 29d748bc..24bc0c12 100644 --- a/tests/test_result_conversions.py +++ b/tests/test_result_conversions.py @@ -7,7 +7,6 @@ def test_format_datatypes_does_not_convert_columns_without_timestamp_suffix(): - # Create a sample DataFrame with different timestamp formats data = { "date_col_dont_convert": ["2023-10-01", "2023-10-02", "2023-07-01"], "int_values": [1, 2, 3], @@ -18,7 +17,7 @@ def test_format_datatypes_does_not_convert_columns_without_timestamp_suffix(): # Call the function to format datatypes formatted_df = format_datatypes(df) - # Check if the columns are converted to datetime + # Check the columns are not converted to datetime assert not pd.api.types.is_datetime64_any_dtype( formatted_df["date_col_dont_convert"] ) From 8f0488dddaf78d5fe46c0ecb4248f377513dd65d Mon Sep 17 00:00:00 2001 From: Dhruv Segat Date: Thu, 8 May 2025 12:14:50 +0100 Subject: [PATCH 6/7] Log col name --- vortexasdk/result_conversions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vortexasdk/result_conversions.py b/vortexasdk/result_conversions.py index be21595c..25f577a1 100644 --- a/vortexasdk/result_conversions.py +++ b/vortexasdk/result_conversions.py @@ -32,7 +32,9 @@ def format_datatypes(df: pd.DataFrame) -> pd.DataFrame: try: df[col] = pd.to_datetime(df[col], format="ISO8601") except (ValueError, pd.errors.ParserError): - logger.debug("Failed to parse ISO8601 format, trying default") + logger.debug( + f"Failed to parse column=[{col}] using ISO8601 format, trying default" + ) df[col] = pd.to_datetime(df[col]) return df From 0e6994b0f2362076944d9a0eaeec21d6e438e388 Mon Sep 17 00:00:00 2001 From: Dhruv Segat Date: Thu, 8 May 2025 12:18:58 +0100 Subject: [PATCH 7/7] Add test case --- tests/test_result_conversions.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_result_conversions.py b/tests/test_result_conversions.py index 24bc0c12..694744ae 100644 --- a/tests/test_result_conversions.py +++ b/tests/test_result_conversions.py @@ -58,6 +58,25 @@ def test_format_datatypes_does_not_convert_columns_without_timestamp_suffix(): ), id="mixed ISO formats are parsed", ), + pytest.param( + pd.Series( + [ + "2025/01/31", + "2025/01/01", + "2025/03/01", + ], + dtype=str, + ), + "custom_fmt_timestamp", + pd.Series( + [ + datetime(2025, 1, 31), + datetime(2025, 1, 1), + datetime(2025, 3, 1), + ] + ), + id="non ISO formats are parsed", + ), pytest.param( pd.Series( [