From e68b4a1cafb233def022d04629420d55f94701bf Mon Sep 17 00:00:00 2001 From: Govert Verkes Date: Mon, 13 Oct 2025 10:36:33 +0200 Subject: [PATCH 1/7] Fix Pydantic deserialization for FlyteFile and FlyteDirectory Fixes #6669 Signed-off-by: Govert Verkes Signed-off-by: Govert Verkes --- flytekit/types/directory/types.py | 12 ++++++- flytekit/types/file/file.py | 13 ++++++- .../test_pydantic_basemodel_transformer.py | 34 +++++++++++++++++++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 286cfafbf6..04d5bc16e3 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -179,7 +179,17 @@ def serialize_flyte_dir(self) -> Dict[str, str]: @model_validator(mode="after") def deserialize_flyte_dir(self, info) -> FlyteDirectory: if info.context is None or info.context.get("deserialize") is not True: - return self + # Check if all private attributes are already set up (e.g., from __init__) + if hasattr(self, "_downloader") and hasattr(self, "_remote_source"): + return self + + # Populate missing private attributes for Pydantic-deserialized instances + dict_obj = {"path": str(self.path)} + + return FlyteDirToMultipartBlobTransformer().dict_to_flyte_directory( + dict_obj=dict_obj, + expected_python_type=type(self), + ) pv = FlyteDirToMultipartBlobTransformer().to_python_value( FlyteContextManager.current_context(), diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index 780188f9e5..09fd8c9249 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -196,7 +196,18 @@ def serialize_flyte_file(self) -> Dict[str, typing.Any]: @model_validator(mode="after") def deserialize_flyte_file(self, info) -> "FlyteFile": if info.context is None or info.context.get("deserialize") is not True: - return self + if hasattr(self, "_downloader") and hasattr(self, "_remote_source"): + return self + + dict_obj = {"path": str(self.path)} + metadata = getattr(self, "metadata", None) + if metadata is not None: + dict_obj["metadata"] = metadata + + return FlyteFilePathTransformer().dict_to_flyte_file( + dict_obj=dict_obj, + expected_python_type=type(self), + ) pv = FlyteFilePathTransformer().to_python_value( FlyteContextManager.current_context(), diff --git a/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py b/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py index 63127eaee2..63e8558de6 100644 --- a/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py +++ b/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py @@ -1022,3 +1022,37 @@ def mock_resolve_remote_path(flyte_uri: str): bm_revived = TypeEngine.to_python_value(ctx, lit, BM) assert bm_revived.s.literal.uri == "/my/replaced/val" + + +def test_flytefile_pydantic_model_dump_validate_cycle(): + class BM(BaseModel): + ff: FlyteFile + + bm = BM(ff=FlyteFile.from_source("s3://my-bucket/file.txt")) + + assert bm.ff.remote_source == "s3://my-bucket/file.txt" + + bm_dict = bm.model_dump() + bm2 = BM.model_validate(bm_dict) + + assert isinstance(bm2.ff, FlyteFile) + assert bm2.ff.remote_source == "s3://my-bucket/file.txt" + + bm2.model_dump() + + +def test_flytedirectory_pydantic_model_dump_validate_cycle(): + class BM(BaseModel): + fd: FlyteDirectory + + bm = BM(fd=FlyteDirectory.from_source("s3://my-bucket/my-dir")) + + assert bm.fd.remote_source == "s3://my-bucket/my-dir" + + bm_dict = bm.model_dump() + bm2 = BM.model_validate(bm_dict) + + assert isinstance(bm2.fd, FlyteDirectory) + assert bm2.fd.remote_source == "s3://my-bucket/my-dir" + + bm2.model_dump() From f1602620a1ad2d77df5b707f41efee50088256c2 Mon Sep 17 00:00:00 2001 From: Govert Verkes Date: Sat, 18 Oct 2025 15:46:15 +0200 Subject: [PATCH 2/7] Add tests to improve codecov coverage for FlyteFile and FlyteDirectory validators Signed-off-by: Govert Verkes --- .../test_pydantic_basemodel_transformer.py | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py b/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py index 63e8558de6..cc564f6fe5 100644 --- a/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py +++ b/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py @@ -1041,6 +1041,61 @@ class BM(BaseModel): bm2.model_dump() +def test_flytefile_pydantic_with_local_file(local_dummy_file): + class BM(BaseModel): + ff: FlyteFile + + bm = BM(ff=FlyteFile(path=local_dummy_file)) + + bm_dict = bm.model_dump() + bm2 = BM.model_validate(bm_dict) + + assert isinstance(bm2.ff, FlyteFile) + assert hasattr(bm2.ff, "_downloader") + assert hasattr(bm2.ff, "_remote_source") + + bm2.model_dump() + + +def test_flytefile_pydantic_with_metadata(local_dummy_file): + class BM(BaseModel): + ff: FlyteFile + + bm = BM(ff=FlyteFile(path=local_dummy_file, metadata={"key": "value"})) + + bm_dict = bm.model_dump() + bm2 = BM.model_validate(bm_dict) + + assert isinstance(bm2.ff, FlyteFile) + assert hasattr(bm2.ff, "_downloader") + assert hasattr(bm2.ff, "_remote_source") + assert bm2.ff.metadata == {"key": "value"} + + bm2.model_dump() + + +def test_flytefile_pydantic_direct_dict_validate(local_dummy_file): + class BM(BaseModel): + ff: FlyteFile + + bm = BM.model_validate({"ff": {"path": local_dummy_file}}) + + assert isinstance(bm.ff, FlyteFile) + assert hasattr(bm.ff, "_downloader") + assert hasattr(bm.ff, "_remote_source") + + +def test_flytedirectory_pydantic_direct_dict_validate(local_dummy_directory): + class BM(BaseModel): + fd: FlyteDirectory + + bm = BM.model_validate({"fd": {"path": local_dummy_directory}}) + + assert isinstance(bm.fd, FlyteDirectory) + assert hasattr(bm.fd, "_downloader") + assert hasattr(bm.fd, "_remote_source") + + def test_flytedirectory_pydantic_model_dump_validate_cycle(): class BM(BaseModel): fd: FlyteDirectory @@ -1056,3 +1111,19 @@ class BM(BaseModel): assert bm2.fd.remote_source == "s3://my-bucket/my-dir" bm2.model_dump() + + +def test_flytedirectory_pydantic_with_local_directory(local_dummy_directory): + class BM(BaseModel): + fd: FlyteDirectory + + bm = BM(fd=FlyteDirectory(path=local_dummy_directory)) + + bm_dict = bm.model_dump() + bm2 = BM.model_validate(bm_dict) + + assert isinstance(bm2.fd, FlyteDirectory) + assert hasattr(bm2.fd, "_downloader") + assert hasattr(bm2.fd, "_remote_source") + + bm2.model_dump() From 38bf6330712bc4c2ff4535ee257b9a60c9ebbf5d Mon Sep 17 00:00:00 2001 From: Govert Verkes Date: Sat, 6 Dec 2025 17:38:50 +0100 Subject: [PATCH 3/7] Simplify Pydantic deserialization with helper method Extract hasattr check into _was_initialized_via_init() helper for clarity. Remove redundant dict_to_flyte_* calls - fall through to to_python_value instead. Signed-off-by: Govert Verkes --- flytekit/types/directory/types.py | 15 +++++---------- flytekit/types/file/file.py | 16 +++++----------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 04d5bc16e3..305ec28a7f 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -176,21 +176,16 @@ def serialize_flyte_dir(self) -> Dict[str, str]: ) return {"path": lv.scalar.blob.uri} + def _was_initialized_via_init(self) -> bool: + """Check if __init__ was called (Pydantic deserialization bypasses __init__).""" + return hasattr(self, "_downloader") and hasattr(self, "_remote_source") + @model_validator(mode="after") def deserialize_flyte_dir(self, info) -> FlyteDirectory: if info.context is None or info.context.get("deserialize") is not True: - # Check if all private attributes are already set up (e.g., from __init__) - if hasattr(self, "_downloader") and hasattr(self, "_remote_source"): + if self._was_initialized_via_init(): return self - # Populate missing private attributes for Pydantic-deserialized instances - dict_obj = {"path": str(self.path)} - - return FlyteDirToMultipartBlobTransformer().dict_to_flyte_directory( - dict_obj=dict_obj, - expected_python_type=type(self), - ) - pv = FlyteDirToMultipartBlobTransformer().to_python_value( FlyteContextManager.current_context(), Literal( diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index 09fd8c9249..edb9b804a1 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -193,22 +193,16 @@ def serialize_flyte_file(self) -> Dict[str, typing.Any]: out["metadata"] = lv.metadata return out + def _was_initialized_via_init(self) -> bool: + """Check if __init__ was called (Pydantic deserialization bypasses __init__).""" + return hasattr(self, "_downloader") and hasattr(self, "_remote_source") + @model_validator(mode="after") def deserialize_flyte_file(self, info) -> "FlyteFile": if info.context is None or info.context.get("deserialize") is not True: - if hasattr(self, "_downloader") and hasattr(self, "_remote_source"): + if self._was_initialized_via_init(): return self - dict_obj = {"path": str(self.path)} - metadata = getattr(self, "metadata", None) - if metadata is not None: - dict_obj["metadata"] = metadata - - return FlyteFilePathTransformer().dict_to_flyte_file( - dict_obj=dict_obj, - expected_python_type=type(self), - ) - pv = FlyteFilePathTransformer().to_python_value( FlyteContextManager.current_context(), Literal( From 51424ecfbbb34174c5796f2893dddd91d200cd5b Mon Sep 17 00:00:00 2001 From: gverkes Date: Thu, 18 Dec 2025 10:45:00 +0000 Subject: [PATCH 4/7] Update flytekit/types/file/file.py Co-authored-by: Nary Yeh <60069744+machichima@users.noreply.github.com> Signed-off-by: Govert Verkes Signed-off-by: gverkes --- flytekit/types/file/file.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index edb9b804a1..6a635120cb 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -194,7 +194,17 @@ def serialize_flyte_file(self) -> Dict[str, typing.Any]: return out def _was_initialized_via_init(self) -> bool: - """Check if __init__ was called (Pydantic deserialization bypasses __init__).""" + """Check if object was initialized via __init__ or deserialized by Pydantic. + + When Pydantic deserializes a FlyteFile, it bypasses __init__ and directly + sets the 'path' attribute. This means the _downloader and _remote_source attributes + won't exist. We use this check to determine if we need to go through the transformer + to properly set up these internal attributes. + + Returns: + True if __init__ was called (normal initialization), False if created via + Pydantic deserialization and needs to pass through transformer. + """ return hasattr(self, "_downloader") and hasattr(self, "_remote_source") @model_validator(mode="after") From 0820f139955c6c2250ba46f0e4bbb8695f2e2691 Mon Sep 17 00:00:00 2001 From: gverkes Date: Thu, 18 Dec 2025 10:45:15 +0000 Subject: [PATCH 5/7] Update flytekit/types/directory/types.py Co-authored-by: Nary Yeh <60069744+machichima@users.noreply.github.com> Signed-off-by: Govert Verkes Signed-off-by: gverkes --- flytekit/types/directory/types.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 305ec28a7f..bdc0c9ecd0 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -177,7 +177,17 @@ def serialize_flyte_dir(self) -> Dict[str, str]: return {"path": lv.scalar.blob.uri} def _was_initialized_via_init(self) -> bool: - """Check if __init__ was called (Pydantic deserialization bypasses __init__).""" + """Check if object was initialized via __init__ or deserialized by Pydantic. + + When Pydantic deserializes a FlyteDirectory, it bypasses __init__ and directly + sets the 'path' attribute. This means the _downloader and _remote_source attributes + won't exist. We use this check to determine if we need to go through the transformer + to properly set up these internal attributes. + + Returns: + True if __init__ was called (normal initialization), False if created via + Pydantic deserialization and needs to pass through transformer. + """ return hasattr(self, "_downloader") and hasattr(self, "_remote_source") @model_validator(mode="after") From 6bcdbefb14f5d01d431962e89b08eefc13242ff9 Mon Sep 17 00:00:00 2001 From: gverkes Date: Thu, 18 Dec 2025 10:45:32 +0000 Subject: [PATCH 6/7] Update tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py Co-authored-by: Nary Yeh <60069744+machichima@users.noreply.github.com> Signed-off-by: Govert Verkes Signed-off-by: gverkes --- .../test_pydantic_basemodel_transformer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py b/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py index cc564f6fe5..6206817bfb 100644 --- a/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py +++ b/tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py @@ -1038,7 +1038,8 @@ class BM(BaseModel): assert isinstance(bm2.ff, FlyteFile) assert bm2.ff.remote_source == "s3://my-bucket/file.txt" - bm2.model_dump() + bm2_dict = bm2.model_dump() + assert bm_dict == bm2_dict def test_flytefile_pydantic_with_local_file(local_dummy_file): From c0847c9f2ae7805d5c19cd8509614666c7f609a9 Mon Sep 17 00:00:00 2001 From: Govert Verkes Date: Tue, 30 Dec 2025 16:00:43 +0000 Subject: [PATCH 7/7] Fix docstring return types for pydoclint DOC203 Add explicit 'bool:' return type to docstrings for _was_initialized_via_init methods in FlyteFile and FlyteDirectory to satisfy pydoclint linting. Signed-off-by: Govert Verkes --- flytekit/types/directory/types.py | 6 +++--- flytekit/types/file/file.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index bdc0c9ecd0..ec081aca94 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -178,15 +178,15 @@ def serialize_flyte_dir(self) -> Dict[str, str]: def _was_initialized_via_init(self) -> bool: """Check if object was initialized via __init__ or deserialized by Pydantic. - + When Pydantic deserializes a FlyteDirectory, it bypasses __init__ and directly sets the 'path' attribute. This means the _downloader and _remote_source attributes won't exist. We use this check to determine if we need to go through the transformer to properly set up these internal attributes. Returns: - True if __init__ was called (normal initialization), False if created via - Pydantic deserialization and needs to pass through transformer. + bool: True if __init__ was called (normal initialization), False if created via + Pydantic deserialization and needs to pass through transformer. """ return hasattr(self, "_downloader") and hasattr(self, "_remote_source") diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index 6a635120cb..47915add8e 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -195,15 +195,15 @@ def serialize_flyte_file(self) -> Dict[str, typing.Any]: def _was_initialized_via_init(self) -> bool: """Check if object was initialized via __init__ or deserialized by Pydantic. - + When Pydantic deserializes a FlyteFile, it bypasses __init__ and directly sets the 'path' attribute. This means the _downloader and _remote_source attributes won't exist. We use this check to determine if we need to go through the transformer to properly set up these internal attributes. Returns: - True if __init__ was called (normal initialization), False if created via - Pydantic deserialization and needs to pass through transformer. + bool: True if __init__ was called (normal initialization), False if created via + Pydantic deserialization and needs to pass through transformer. """ return hasattr(self, "_downloader") and hasattr(self, "_remote_source")