From 8fdf854597cb427e0354ed44a681baca867cd912 Mon Sep 17 00:00:00 2001 From: claudevdm Date: Mon, 23 Jun 2025 16:27:09 -0400 Subject: [PATCH 01/13] Make cloudpickle configurable. --- cloudpickle/cloudpickle.py | 149 ++++++---- tests/cloudpickle_test.py | 577 ++++++++++++++++++++----------------- tests/testutils.py | 61 +++- 3 files changed, 450 insertions(+), 337 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 4f4d857a..81b963c6 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -58,6 +58,7 @@ import dataclasses import dis from enum import Enum +import functools import io import itertools import logging @@ -93,10 +94,22 @@ # appropriate and preserve the usual "isinstance" semantics of Python objects. _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() +_DYNAMIC_CLASS_STATE_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() _DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() PYPY = platform.python_implementation() == "PyPy" +def uuid_generator(_): + return uuid.uuid4().hex + +@dataclasses.dataclass +class CloudPickleConfig: + """Configuration for cloudpickle behavior.""" + id_generator: typing.Optional[callable] = uuid_generator + skip_reset_dynamic_type_state: bool = False + +DEFAULT_CONFIG = CloudPickleConfig() + builtin_code_type = None if PYPY: # builtin-code objects only exist in pypy @@ -105,11 +118,11 @@ _extract_code_globals_cache = weakref.WeakKeyDictionary() -def _get_or_create_tracker_id(class_def): +def _get_or_create_tracker_id(class_def, config): with _DYNAMIC_CLASS_TRACKER_LOCK: class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) - if class_tracker_id is None: - class_tracker_id = uuid.uuid4().hex + if class_tracker_id is None and config.id_generator is not None: + class_tracker_id = config.id_generator(class_def) _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def return class_tracker_id @@ -599,26 +612,26 @@ def _make_typevar(name, bound, constraints, covariant, contravariant, class_trac return _lookup_class_or_track(class_tracker_id, tv) -def _decompose_typevar(obj): +def _decompose_typevar(obj, config): return ( obj.__name__, obj.__bound__, obj.__constraints__, obj.__covariant__, obj.__contravariant__, - _get_or_create_tracker_id(obj), + _get_or_create_tracker_id(obj, config), ) -def _typevar_reduce(obj): +def _typevar_reduce(obj, config): # TypeVar instances require the module information hence why we # are not using the _should_pickle_by_reference directly module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) if module_and_name is None: - return (_make_typevar, _decompose_typevar(obj)) + return (_make_typevar, _decompose_typevar(obj, config)) elif _is_registered_pickle_by_value(module_and_name[0]): - return (_make_typevar, _decompose_typevar(obj)) + return (_make_typevar, _decompose_typevar(obj, config)) return (getattr, module_and_name) @@ -662,7 +675,7 @@ def _make_dict_items(obj, is_ordered=False): # ------------------------------------------------- -def _class_getnewargs(obj): +def _class_getnewargs(obj, config): type_kwargs = {} if "__module__" in obj.__dict__: type_kwargs["__module__"] = obj.__module__ @@ -676,12 +689,12 @@ def _class_getnewargs(obj): obj.__name__, _get_bases(obj), type_kwargs, - _get_or_create_tracker_id(obj), + _get_or_create_tracker_id(obj, config), None, ) -def _enum_getnewargs(obj): +def _enum_getnewargs(obj, config): members = {e.name: e.value for e in obj} return ( obj.__bases__, @@ -689,7 +702,7 @@ def _enum_getnewargs(obj): obj.__qualname__, members, obj.__module__, - _get_or_create_tracker_id(obj), + _get_or_create_tracker_id(obj, config), None, ) @@ -1033,7 +1046,7 @@ def _weakset_reduce(obj): return weakref.WeakSet, (list(obj),) -def _dynamic_class_reduce(obj): +def _dynamic_class_reduce(obj, config): """Save a class that can't be referenced as a module attribute. This method is used to serialize classes that are defined inside @@ -1043,24 +1056,24 @@ def _dynamic_class_reduce(obj): if Enum is not None and issubclass(obj, Enum): return ( _make_skeleton_enum, - _enum_getnewargs(obj), + _enum_getnewargs(obj, config), _enum_getstate(obj), None, None, - _class_setstate, + functools.partial(_class_setstate, config=config), ) else: return ( _make_skeleton_class, - _class_getnewargs(obj), + _class_getnewargs(obj, config), _class_getstate(obj), None, None, - _class_setstate, + functools.partial(_class_setstate, config=config), ) -def _class_reduce(obj): +def _class_reduce(obj, config): """Select the reducer depending on the dynamic nature of the class obj.""" if obj is type(None): # noqa return type, (None,) @@ -1071,7 +1084,7 @@ def _class_reduce(obj): elif obj in _BUILTIN_TYPE_NAMES: return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) elif not _should_pickle_by_reference(obj): - return _dynamic_class_reduce(obj) + return _dynamic_class_reduce(obj, config) return NotImplemented @@ -1154,41 +1167,45 @@ def _function_setstate(obj, state): for k, v in slotstate.items(): setattr(obj, k, v) - -def _class_setstate(obj, state): - state, slotstate = state - registry = None - for attrname, attr in state.items(): - if attrname == "_abc_impl": - registry = attr - else: - # Note: setting attribute names on a class automatically triggers their - # interning in CPython: - # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957 - # - # This means that to get deterministic pickling for a dynamic class that - # was initially defined in a different Python process, the pickler - # needs to ensure that dynamic class and function attribute names are - # systematically copied into a non-interned version to avoid - # unpredictable pickle payloads. - # - # Indeed the Pickler's memoizer relies on physical object identity to break - # cycles in the reference graph of the object being serialized. - setattr(obj, attrname, attr) - - if sys.version_info >= (3, 13) and "__firstlineno__" in state: - # Set the Python 3.13+ only __firstlineno__ attribute one more time, as it - # will be automatically deleted by the `setattr(obj, attrname, attr)` call - # above when `attrname` is "__firstlineno__". We assume that preserving this - # information might be important for some users and that it not stale in the - # context of cloudpickle usage, hence legitimate to propagate. Furthermore it - # is necessary to do so to keep deterministic chained pickling as tested in - # test_deterministic_str_interning_for_chained_dynamic_class_pickling. - obj.__firstlineno__ = state["__firstlineno__"] - - if registry is not None: - for subclass in registry: - obj.register(subclass) +def _class_setstate(obj, state, config): + # Lock while potentially modifying class state. + with _DYNAMIC_CLASS_TRACKER_LOCK: + if config.skip_reset_dynamic_type_state and obj in _DYNAMIC_CLASS_STATE_TRACKER_BY_CLASS: + return obj + _DYNAMIC_CLASS_STATE_TRACKER_BY_CLASS[obj] = True + state, slotstate = state + registry = None + for attrname, attr in state.items(): + if attrname == "_abc_impl": + registry = attr + else: + # Note: setting attribute names on a class automatically triggers their + # interning in CPython: + # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957 + # + # This means that to get deterministic pickling for a dynamic class that + # was initially defined in a different Python process, the pickler + # needs to ensure that dynamic class and function attribute names are + # systematically copied into a non-interned version to avoid + # unpredictable pickle payloads. + # + # Indeed the Pickler's memoizer relies on physical object identity to break + # cycles in the reference graph of the object being serialized. + setattr(obj, attrname, attr) + + if sys.version_info >= (3, 13) and "__firstlineno__" in state: + # Set the Python 3.13+ only __firstlineno__ attribute one more time, as it + # will be automatically deleted by the `setattr(obj, attrname, attr)` call + # above when `attrname` is "__firstlineno__". We assume that preserving this + # information might be important for some users and that it not stale in the + # context of cloudpickle usage, hence legitimate to propagate. Furthermore it + # is necessary to do so to keep deterministic chained pickling as tested in + # test_deterministic_str_interning_for_chained_dynamic_class_pickling. + obj.__firstlineno__ = state["__firstlineno__"] + + if registry is not None: + for subclass in registry: + obj.register(subclass) return obj @@ -1228,7 +1245,6 @@ class Pickler(pickle.Pickler): _dispatch_table[types.MethodType] = _method_reduce _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce _dispatch_table[weakref.WeakSet] = _weakset_reduce - _dispatch_table[typing.TypeVar] = _typevar_reduce _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce @@ -1305,7 +1321,7 @@ def dump(self, obj): msg = "Could not pickle object as excessively deep recursion required." raise pickle.PicklingError(msg) from e - def __init__(self, file, protocol=None, buffer_callback=None): + def __init__(self, file, protocol=None, buffer_callback=None, config=DEFAULT_CONFIG): if protocol is None: protocol = DEFAULT_PROTOCOL super().__init__(file, protocol=protocol, buffer_callback=buffer_callback) @@ -1314,6 +1330,7 @@ def __init__(self, file, protocol=None, buffer_callback=None): # their global namespace at unpickling time. self.globals_ref = {} self.proto = int(protocol) + self.config = config if not PYPY: # pickle.Pickler is the C implementation of the CPython pickler and @@ -1380,7 +1397,9 @@ def reducer_override(self, obj): is_anyclass = False if is_anyclass: - return _class_reduce(obj) + return _class_reduce(obj, self.config) + elif isinstance(obj, typing.TypeVar): # Add this check + return _typevar_reduce(obj, self.config) elif isinstance(obj, types.FunctionType): return self._function_reduce(obj) else: @@ -1448,12 +1467,18 @@ def save_global(self, obj, name=None, pack=struct.pack): if name is not None: super().save_global(obj, name=name) elif not _should_pickle_by_reference(obj, name=name): - self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) + self._save_reduce_pickle5(*_dynamic_class_reduce(obj, self.config), obj=obj) else: super().save_global(obj, name=name) dispatch[type] = save_global + def save_typevar(self, obj, name=None): + """Handle TypeVar objects with access to config.""" + return self._save_reduce_pickle5(*_typevar_reduce(obj, self.config), obj=obj) + + dispatch[typing.TypeVar] = save_typevar + def save_function(self, obj, name=None): """Registered with the dispatch to handle all function types. @@ -1500,7 +1525,7 @@ def save_pypy_builtin_func(self, obj): # Shorthands similar to pickle.dump/pickle.dumps -def dump(obj, file, protocol=None, buffer_callback=None): +def dump(obj, file, protocol=None, buffer_callback=None, config=DEFAULT_CONFIG): """Serialize obj as bytes streamed into file protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to @@ -1513,10 +1538,10 @@ def dump(obj, file, protocol=None, buffer_callback=None): implementation details that can change from one Python version to the next). """ - Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj) + Pickler(file, protocol=protocol, buffer_callback=buffer_callback, config=config).dump(obj) -def dumps(obj, protocol=None, buffer_callback=None): +def dumps(obj, protocol=None, buffer_callback=None, config=DEFAULT_CONFIG): """Serialize obj as a string of bytes allocated in memory protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to @@ -1530,7 +1555,7 @@ def dumps(obj, protocol=None, buffer_callback=None): next). """ with io.BytesIO() as file: - cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback) + cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback, config=config) cp.dump(obj) return file.getvalue() diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 45e8c592..17c00867 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -54,12 +54,12 @@ from .testutils import subprocess_pickle_string from .testutils import assert_run_python_script from .testutils import check_deterministic_pickle +from .testutils import get_config _TEST_GLOBAL_VARIABLE = "default_value" _TEST_GLOBAL_VARIABLE2 = "another_value" - class RaiserOnPickle: def __init__(self, exc): self.exc = exc @@ -68,12 +68,6 @@ def __reduce__(self): raise self.exc -def pickle_depickle(obj, protocol=cloudpickle.DEFAULT_PROTOCOL): - """Helper function to test whether object pickled with cloudpickle can be - depickled with pickle - """ - return pickle.loads(cloudpickle.dumps(obj, protocol=protocol)) - def _escape(raw_filepath): # Ugly hack to embed filepaths in code templates for windows @@ -120,15 +114,33 @@ def method_c(self): assert clsdict["__doc__"] is None assert clsdict["method_c"](C()) == C().method_c() - class CloudPickleTest(unittest.TestCase): protocol = cloudpickle.DEFAULT_PROTOCOL + config = 'default' + + def should_maintain_isinstance_semantics(self): + return get_config(self.config).id_generator is not None def setUp(self): self.tmpdir = tempfile.mkdtemp(prefix="tmp_cloudpickle_test_") def tearDown(self): shutil.rmtree(self.tmpdir) + + def dumps(self, obj, buffer_callback=None): + return cloudpickle.dumps( + obj, + protocol=self.protocol, + buffer_callback=buffer_callback, + config=get_config(self.config)) + + + def pickle_depickle(self, obj): + """Helper function to test whether object pickled with cloudpickle can be + depickled with pickle + """ + return pickle.loads(self.dumps(obj)) + @pytest.mark.skipif( platform.python_implementation() != "CPython" or sys.version_info < (3, 8, 2), @@ -148,7 +160,7 @@ class MyClass: my_object = MyClass() wr = weakref.ref(my_object) - cloudpickle.dumps(my_object) + self.dumps(my_object) del my_object assert wr() is None, "'del'-ed my_object has not been collected" @@ -156,11 +168,11 @@ def test_itemgetter(self): d = range(10) getter = itemgetter(1) - getter2 = pickle_depickle(getter, protocol=self.protocol) + getter2 = self.pickle_depickle(getter) self.assertEqual(getter(d), getter2(d)) getter = itemgetter(0, 3) - getter2 = pickle_depickle(getter, protocol=self.protocol) + getter2 = self.pickle_depickle(getter) self.assertEqual(getter(d), getter2(d)) def test_attrgetter(self): @@ -170,24 +182,24 @@ def __getattr__(self, item): d = C() getter = attrgetter("a") - getter2 = pickle_depickle(getter, protocol=self.protocol) + getter2 = self.pickle_depickle(getter) self.assertEqual(getter(d), getter2(d)) getter = attrgetter("a", "b") - getter2 = pickle_depickle(getter, protocol=self.protocol) + getter2 = self.pickle_depickle(getter) self.assertEqual(getter(d), getter2(d)) d.e = C() getter = attrgetter("e.a") - getter2 = pickle_depickle(getter, protocol=self.protocol) + getter2 = self.pickle_depickle(getter) self.assertEqual(getter(d), getter2(d)) getter = attrgetter("e.a", "e.b") - getter2 = pickle_depickle(getter, protocol=self.protocol) + getter2 = self.pickle_depickle(getter) self.assertEqual(getter(d), getter2(d)) # Regression test for SPARK-3415 def test_pickling_file_handles(self): out1 = sys.stderr - out2 = pickle.loads(cloudpickle.dumps(out1, protocol=self.protocol)) + out2 = pickle.loads(self.dumps(out1)) self.assertEqual(out1, out2) def test_func_globals(self): @@ -199,77 +211,77 @@ def __reduce__(self): exit = Unpicklable() self.assertRaises( - Exception, lambda: cloudpickle.dumps(exit, protocol=self.protocol) + Exception, lambda: self.dumps(exit) ) def foo(): sys.exit(0) self.assertTrue("exit" in foo.__code__.co_names) - cloudpickle.dumps(foo) + self.dumps(foo) def test_memoryview(self): buffer_obj = memoryview(b"Hello") self.assertEqual( - pickle_depickle(buffer_obj, protocol=self.protocol), buffer_obj.tobytes() + self.pickle_depickle(buffer_obj), buffer_obj.tobytes() ) def test_dict_keys(self): keys = {"a": 1, "b": 2}.keys() - results = pickle_depickle(keys) + results = self.pickle_depickle(keys) self.assertEqual(results, keys) assert isinstance(results, _collections_abc.dict_keys) def test_dict_values(self): values = {"a": 1, "b": 2}.values() - results = pickle_depickle(values) + results = self.pickle_depickle(values) self.assertEqual(sorted(results), sorted(values)) assert isinstance(results, _collections_abc.dict_values) def test_dict_items(self): items = {"a": 1, "b": 2}.items() - results = pickle_depickle(items) + results = self.pickle_depickle(items) self.assertEqual(results, items) assert isinstance(results, _collections_abc.dict_items) def test_odict_keys(self): keys = collections.OrderedDict([("a", 1), ("b", 2)]).keys() - results = pickle_depickle(keys) + results = self.pickle_depickle(keys) self.assertEqual(results, keys) assert type(keys) is type(results) def test_odict_values(self): values = collections.OrderedDict([("a", 1), ("b", 2)]).values() - results = pickle_depickle(values) + results = self.pickle_depickle(values) self.assertEqual(list(results), list(values)) assert type(values) is type(results) def test_odict_items(self): items = collections.OrderedDict([("a", 1), ("b", 2)]).items() - results = pickle_depickle(items) + results = self.pickle_depickle(items) self.assertEqual(results, items) assert type(items) is type(results) def test_sliced_and_non_contiguous_memoryview(self): buffer_obj = memoryview(b"Hello!" * 3)[2:15:2] self.assertEqual( - pickle_depickle(buffer_obj, protocol=self.protocol), buffer_obj.tobytes() + self.pickle_depickle(buffer_obj), buffer_obj.tobytes() ) def test_large_memoryview(self): buffer_obj = memoryview(b"Hello!" * int(1e7)) self.assertEqual( - pickle_depickle(buffer_obj, protocol=self.protocol), buffer_obj.tobytes() + self.pickle_depickle(buffer_obj), buffer_obj.tobytes() ) def test_lambda(self): - self.assertEqual(pickle_depickle(lambda: 1, protocol=self.protocol)(), 1) + self.assertEqual(self.pickle_depickle(lambda: 1)(), 1) def test_nested_lambdas(self): a, b = 1, 2 f1 = lambda x: x + a # noqa: E731 f2 = lambda x: f1(x) // b # noqa: E731 - self.assertEqual(pickle_depickle(f2, protocol=self.protocol)(1), 1) + self.assertEqual(self.pickle_depickle(f2)(1), 1) def test_recursive_closure(self): def f1(): @@ -284,10 +296,10 @@ def g(n): return g - g1 = pickle_depickle(f1(), protocol=self.protocol) + g1 = self.pickle_depickle(f1()) self.assertEqual(g1(), g1) - g2 = pickle_depickle(f2(2), protocol=self.protocol) + g2 = self.pickle_depickle(f2(2)) self.assertEqual(g2(5), 240) def test_closure_none_is_preserved(self): @@ -299,7 +311,7 @@ def f(): msg="f actually has closure cells!", ) - g = pickle_depickle(f, protocol=self.protocol) + g = self.pickle_depickle(f) self.assertTrue( g.__closure__ is None, @@ -320,7 +332,7 @@ def g(): with pytest.raises(NameError): g1() - g2 = pickle_depickle(g1, protocol=self.protocol) + g2 = self.pickle_depickle(g1) with pytest.raises(NameError): g2() @@ -333,7 +345,7 @@ def g(): return g - g = pickle_depickle(f(), protocol=self.protocol) + g = self.pickle_depickle(f()) self.assertEqual(g(), 2) def test_class_no_firstlineno_deletion_(self): @@ -350,7 +362,7 @@ class A: pass if hasattr(A, "__firstlineno__"): - A_roundtrip = pickle_depickle(A, protocol=self.protocol) + A_roundtrip = self.pickle_depickle(A) assert hasattr(A_roundtrip, "__firstlineno__") assert A_roundtrip.__firstlineno__ == A.__firstlineno__ @@ -369,7 +381,7 @@ def method(self): self.assertEqual(Derived().method(), 2) # Pickle and unpickle the class. - UnpickledDerived = pickle_depickle(Derived, protocol=self.protocol) + UnpickledDerived = self.pickle_depickle(Derived) self.assertEqual(UnpickledDerived().method(), 2) # We have special logic for handling __doc__ because it's a readonly @@ -378,7 +390,7 @@ def method(self): # Pickle and unpickle an instance. orig_d = Derived() - d = pickle_depickle(orig_d, protocol=self.protocol) + d = self.pickle_depickle(orig_d) self.assertEqual(d.method(), 2) def test_cycle_in_classdict_globals(self): @@ -389,8 +401,8 @@ def it_works(self): C.C_again = C C.instance_of_C = C() - depickled_C = pickle_depickle(C, protocol=self.protocol) - depickled_instance = pickle_depickle(C()) + depickled_C = self.pickle_depickle(C) + depickled_instance = self.pickle_depickle(C()) # Test instance of depickled class. self.assertEqual(depickled_C().it_works(), "woohoo!") @@ -407,9 +419,9 @@ def some_function(x, y): return (x + y) / LOCAL_CONSTANT # pickle the function definition - result = pickle_depickle(some_function, protocol=self.protocol)(41, 1) + result = self.pickle_depickle(some_function)(41, 1) assert result == 1 - result = pickle_depickle(some_function, protocol=self.protocol)(81, 3) + result = self.pickle_depickle(some_function)(81, 3) assert result == 2 hidden_constant = lambda: LOCAL_CONSTANT # noqa: E731 @@ -427,29 +439,29 @@ def some_method(self, x): return self.one() + some_function(x, 1) + self.value # pickle the class definition - clone_class = pickle_depickle(SomeClass, protocol=self.protocol) + clone_class = self.pickle_depickle(SomeClass) self.assertEqual(clone_class(1).one(), 1) self.assertEqual(clone_class(5).some_method(41), 7) - clone_class = subprocess_pickle_echo(SomeClass, protocol=self.protocol) + clone_class = subprocess_pickle_echo(SomeClass, self.protocol, config=self.config) self.assertEqual(clone_class(5).some_method(41), 7) # pickle the class instances - self.assertEqual(pickle_depickle(SomeClass(1)).one(), 1) - self.assertEqual(pickle_depickle(SomeClass(5)).some_method(41), 7) - new_instance = subprocess_pickle_echo(SomeClass(5), protocol=self.protocol) + self.assertEqual(self.pickle_depickle(SomeClass(1)).one(), 1) + self.assertEqual(self.pickle_depickle(SomeClass(5)).some_method(41), 7) + new_instance = subprocess_pickle_echo(SomeClass(5), self.protocol, config=self.config) self.assertEqual(new_instance.some_method(41), 7) # pickle the method instances - self.assertEqual(pickle_depickle(SomeClass(1).one)(), 1) - self.assertEqual(pickle_depickle(SomeClass(5).some_method)(41), 7) + self.assertEqual(self.pickle_depickle(SomeClass(1).one)(), 1) + self.assertEqual(self.pickle_depickle(SomeClass(5).some_method)(41), 7) new_method = subprocess_pickle_echo( - SomeClass(5).some_method, protocol=self.protocol + SomeClass(5).some_method, protocol=self.protocol, config=self.config ) self.assertEqual(new_method(41), 7) def test_partial(self): partial_obj = functools.partial(min, 1) - partial_clone = pickle_depickle(partial_obj, protocol=self.protocol) + partial_clone = self.pickle_depickle(partial_obj) self.assertEqual(partial_clone(4), 1) @pytest.mark.skipif( @@ -462,25 +474,25 @@ def test_ufunc(self): if np: # simple ufunc: np.add - self.assertEqual(pickle_depickle(np.add, protocol=self.protocol), np.add) + self.assertEqual(self.pickle_depickle(np.add), np.add) else: # skip if numpy is not available pass if spp: # custom ufunc: scipy.special.iv - self.assertEqual(pickle_depickle(spp.iv, protocol=self.protocol), spp.iv) + self.assertEqual(self.pickle_depickle(spp.iv), spp.iv) else: # skip if scipy is not available pass def test_loads_namespace(self): obj = 1, 2, 3, 4 - returned_obj = cloudpickle.loads(cloudpickle.dumps(obj, protocol=self.protocol)) + returned_obj = cloudpickle.loads(self.dumps(obj)) self.assertEqual(obj, returned_obj) def test_load_namespace(self): obj = 1, 2, 3, 4 bio = io.BytesIO() - cloudpickle.dump(obj, bio) + cloudpickle.dump(obj, bio, config=get_config(self.config)) bio.seek(0) returned_obj = cloudpickle.load(bio) self.assertEqual(obj, returned_obj) @@ -489,7 +501,7 @@ def test_generator(self): def some_generator(cnt): yield from range(cnt) - gen2 = pickle_depickle(some_generator, protocol=self.protocol) + gen2 = self.pickle_depickle(some_generator) assert isinstance(gen2(3), type(some_generator(3))) assert list(gen2(3)) == list(range(3)) @@ -507,8 +519,8 @@ def test_cm(cls): sm = A.__dict__["test_sm"] cm = A.__dict__["test_cm"] - A.test_sm = pickle_depickle(sm, protocol=self.protocol) - A.test_cm = pickle_depickle(cm, protocol=self.protocol) + A.test_sm = self.pickle_depickle(sm) + A.test_cm = self.pickle_depickle(cm) self.assertEqual(A.test_sm(), "sm") self.assertEqual(A.test_cm(), "cm") @@ -519,11 +531,11 @@ class A: def test_cm(cls): return "cm" - A.test_cm = pickle_depickle(A.test_cm, protocol=self.protocol) + A.test_cm = self.pickle_depickle(A.test_cm) self.assertEqual(A.test_cm(), "cm") def test_method_descriptors(self): - f = pickle_depickle(str.upper) + f = self.pickle_depickle(str.upper) self.assertEqual(f("abc"), "ABC") def test_instancemethods_without_self(self): @@ -531,12 +543,12 @@ class F: def f(self, x): return x + 1 - g = pickle_depickle(F.f, protocol=self.protocol) + g = self.pickle_depickle(F.f) self.assertEqual(g.__name__, F.f.__name__) # self.assertEqual(g(F(), 1), 2) # still fails def test_module(self): - pickle_clone = pickle_depickle(pickle, protocol=self.protocol) + pickle_clone = self.pickle_depickle(pickle) self.assertEqual(pickle, pickle_clone) def _check_dynamic_module(self, mod): @@ -551,27 +563,27 @@ def method(self, x): return f(x) """ exec(textwrap.dedent(code), mod.__dict__) - mod2 = pickle_depickle(mod, protocol=self.protocol) + mod2 = self.pickle_depickle(mod) self.assertEqual(mod.x, mod2.x) self.assertEqual(mod.f(5), mod2.f(5)) self.assertEqual(mod.Foo().method(5), mod2.Foo().method(5)) if platform.python_implementation() != "PyPy": # XXX: this fails with excessive recursion on PyPy. - mod3 = subprocess_pickle_echo(mod, protocol=self.protocol) + mod3 = subprocess_pickle_echo(mod, self.protocol, self.config) self.assertEqual(mod.x, mod3.x) self.assertEqual(mod.f(5), mod3.f(5)) self.assertEqual(mod.Foo().method(5), mod3.Foo().method(5)) # Test dynamic modules when imported back are singletons - mod1, mod2 = pickle_depickle([mod, mod]) + mod1, mod2 = self.pickle_depickle([mod, mod]) self.assertEqual(id(mod1), id(mod2)) # Ensure proper pickling of mod's functions when module "looks" like a # file-backed module even though it is not: try: sys.modules["mod"] = mod - depickled_f = pickle_depickle(mod.f, protocol=self.protocol) + depickled_f = self.pickle_depickle(mod.f) self.assertEqual(mod.f(5), depickled_f(5)) finally: sys.modules.pop("mod", None) @@ -613,7 +625,7 @@ def test_module_locals_behavior(self): g = make_local_function() with open(pickled_func_path, "wb") as f: - cloudpickle.dump(g, f, protocol=self.protocol) + cloudpickle.dump(g, f, config=get_config(self.config)) assert_run_python_script(textwrap.dedent(child_process_script)) @@ -638,7 +650,7 @@ def __reduce__(self): unpicklable_obj = UnpickleableObject() with pytest.raises(ValueError): - cloudpickle.dumps(unpicklable_obj) + self.dumps(unpicklable_obj) # Emulate the behavior of scipy by injecting an unpickleable object # into mod's builtins. @@ -650,7 +662,7 @@ def __reduce__(self): elif isinstance(mod.__dict__["__builtins__"], types.ModuleType): mod.__dict__["__builtins__"].unpickleable_obj = unpicklable_obj - depickled_mod = pickle_depickle(mod, protocol=self.protocol) + depickled_mod = self.pickle_depickle(mod) assert "__builtins__" in depickled_mod.__dict__ if isinstance(depickled_mod.__dict__["__builtins__"], dict): @@ -693,6 +705,7 @@ def test_load_dynamic_module_in_grandchild_process(self): import cloudpickle from testutils import assert_run_python_script + from testutils import get_config child_of_child_process_script = {child_of_child_process_script} @@ -701,7 +714,7 @@ def test_load_dynamic_module_in_grandchild_process(self): mod = pickle.load(f) with open('{child_process_module_file}', 'wb') as f: - cloudpickle.dump(mod, f, protocol={protocol}) + cloudpickle.dump(mod, f, protocol={protocol}, config=get_config('{config}')) assert_run_python_script(textwrap.dedent(child_of_child_process_script)) """ @@ -725,11 +738,12 @@ def test_load_dynamic_module_in_grandchild_process(self): child_process_module_file=_escape(child_process_module_file), child_of_child_process_script=_escape(child_of_child_process_script), protocol=self.protocol, + config=self.config ) try: with open(parent_process_module_file, "wb") as fid: - cloudpickle.dump(mod, fid, protocol=self.protocol) + cloudpickle.dump(mod, fid, config=get_config(self.config)) assert_run_python_script(textwrap.dedent(child_process_script)) @@ -750,7 +764,7 @@ def unwanted_function(x): def my_small_function(x, y): return nested_function(x) + y - b = cloudpickle.dumps(my_small_function, protocol=self.protocol) + b = self.dumps(my_small_function) # Make sure that the pickle byte string only includes the definition # of my_small_function and its dependency nested_function while @@ -791,14 +805,14 @@ def test_module_importability(self): "_cloudpickle_testpkg.mod.dynamic_submodule" ) # noqa F841 assert _should_pickle_by_reference(m) - assert pickle_depickle(m, protocol=self.protocol) is m + assert self.pickle_depickle(m) is m # Check for similar behavior for a module that cannot be imported by # attribute lookup. from _cloudpickle_testpkg.mod import dynamic_submodule_two as m2 assert _should_pickle_by_reference(m2) - assert pickle_depickle(m2, protocol=self.protocol) is m2 + assert self.pickle_depickle(m2) is m2 # Submodule_three is a dynamic module only importable via module lookup with pytest.raises(ImportError): @@ -810,7 +824,7 @@ def test_module_importability(self): # This module cannot be pickled using attribute lookup (as it does not # have a `__module__` attribute like classes and functions. assert not hasattr(m3, "__module__") - depickled_m3 = pickle_depickle(m3, protocol=self.protocol) + depickled_m3 = self.pickle_depickle(m3) assert depickled_m3 is not m3 assert m3.f(1) == depickled_m3.f(1) @@ -819,29 +833,29 @@ def test_module_importability(self): import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa assert _should_pickle_by_reference(sm) - assert pickle_depickle(sm, protocol=self.protocol) is sm + assert self.pickle_depickle(sm) is sm expected = "cannot check importability of object instances" with pytest.raises(TypeError, match=expected): _should_pickle_by_reference(object()) def test_Ellipsis(self): - self.assertEqual(Ellipsis, pickle_depickle(Ellipsis, protocol=self.protocol)) + self.assertEqual(Ellipsis, self.pickle_depickle(Ellipsis)) def test_NotImplemented(self): - ExcClone = pickle_depickle(NotImplemented, protocol=self.protocol) + ExcClone = self.pickle_depickle(NotImplemented) self.assertEqual(NotImplemented, ExcClone) def test_NoneType(self): - res = pickle_depickle(type(None), protocol=self.protocol) + res = self.pickle_depickle(type(None)) self.assertEqual(type(None), res) def test_EllipsisType(self): - res = pickle_depickle(type(Ellipsis), protocol=self.protocol) + res = self.pickle_depickle(type(Ellipsis)) self.assertEqual(type(Ellipsis), res) def test_NotImplementedType(self): - res = pickle_depickle(type(NotImplemented), protocol=self.protocol) + res = self.pickle_depickle(type(NotImplemented)) self.assertEqual(type(NotImplemented), res) def test_builtin_function(self): @@ -849,20 +863,20 @@ def test_builtin_function(self): # only in python2. # builtin function from the __builtin__ module - assert pickle_depickle(zip, protocol=self.protocol) is zip + assert self.pickle_depickle(zip) is zip from os import mkdir # builtin function from a "regular" module - assert pickle_depickle(mkdir, protocol=self.protocol) is mkdir + assert self.pickle_depickle(mkdir) is mkdir def test_builtin_type_constructor(self): # This test makes sure that cloudpickling builtin-type # constructors works for all python versions/implementation. - # pickle_depickle some builtin methods of the __builtin__ module + # self.pickle_depickle some builtin methods of the __builtin__ module for t in list, tuple, set, frozenset, dict, object: - cloned_new = pickle_depickle(t.__new__, protocol=self.protocol) + cloned_new = self.pickle_depickle(t.__new__) assert isinstance(cloned_new(t), t) # The next 4 tests cover all cases into which builtin python methods can @@ -886,14 +900,14 @@ def test_builtin_classicmethod(self): assert unbound_classicmethod is clsdict_classicmethod - depickled_bound_meth = pickle_depickle( - bound_classicmethod, protocol=self.protocol + depickled_bound_meth = self.pickle_depickle( + bound_classicmethod ) - depickled_unbound_meth = pickle_depickle( - unbound_classicmethod, protocol=self.protocol + depickled_unbound_meth = self.pickle_depickle( + unbound_classicmethod ) - depickled_clsdict_meth = pickle_depickle( - clsdict_classicmethod, protocol=self.protocol + depickled_clsdict_meth = self.pickle_depickle( + clsdict_classicmethod ) # No identity on the bound methods they are bound to different float @@ -908,9 +922,9 @@ def test_builtin_classmethod(self): bound_clsmethod = obj.fromhex # builtin_function_or_method unbound_clsmethod = type(obj).fromhex # builtin_function_or_method - depickled_bound_meth = pickle_depickle(bound_clsmethod, protocol=self.protocol) - depickled_unbound_meth = pickle_depickle( - unbound_clsmethod, protocol=self.protocol + depickled_bound_meth = self.pickle_depickle(bound_clsmethod) + depickled_unbound_meth = self.pickle_depickle( + unbound_clsmethod ) # float.fromhex takes a string as input. @@ -948,8 +962,8 @@ def test_builtin_classmethod_descriptor(self): clsdict_clsmethod = type(obj).__dict__["fromhex"] # classmethod_descriptor - depickled_clsdict_meth = pickle_depickle( - clsdict_clsmethod, protocol=self.protocol + depickled_clsdict_meth = self.pickle_depickle( + clsdict_clsmethod ) # float.fromhex takes a string as input. @@ -976,12 +990,12 @@ def test_builtin_slotmethod(self): unbound_slotmethod = type(obj).__repr__ # wrapper_descriptor clsdict_slotmethod = type(obj).__dict__["__repr__"] # ditto - depickled_bound_meth = pickle_depickle(bound_slotmethod, protocol=self.protocol) - depickled_unbound_meth = pickle_depickle( - unbound_slotmethod, protocol=self.protocol + depickled_bound_meth = self.pickle_depickle(bound_slotmethod) + depickled_unbound_meth = self.pickle_depickle( + unbound_slotmethod ) - depickled_clsdict_meth = pickle_depickle( - clsdict_slotmethod, protocol=self.protocol + depickled_clsdict_meth = self.pickle_depickle( + clsdict_slotmethod ) # No identity tests on the bound slotmethod are they are bound to @@ -1003,14 +1017,14 @@ def test_builtin_staticmethod(self): assert bound_staticmethod is unbound_staticmethod - depickled_bound_meth = pickle_depickle( - bound_staticmethod, protocol=self.protocol + depickled_bound_meth = self.pickle_depickle( + bound_staticmethod ) - depickled_unbound_meth = pickle_depickle( - unbound_staticmethod, protocol=self.protocol + depickled_unbound_meth = self.pickle_depickle( + unbound_staticmethod ) - depickled_clsdict_meth = pickle_depickle( - clsdict_staticmethod, protocol=self.protocol + depickled_clsdict_meth = self.pickle_depickle( + clsdict_staticmethod ) assert depickled_bound_meth is bound_staticmethod @@ -1039,7 +1053,7 @@ def g(y): with pytest.warns(DeprecationWarning): assert cloudpickle.is_tornado_coroutine(g) - data = cloudpickle.dumps([g, g], protocol=self.protocol) + data = self.dumps([g, g]) del f, g g2, g3 = pickle.loads(data) assert g2 is g3 @@ -1071,7 +1085,7 @@ def f(): exec(textwrap.dedent(code), d, d) f = d["f"] res = f() - data = cloudpickle.dumps([f, f], protocol=self.protocol) + data = self.dumps([f, f]) d = f = None f2, f3 = pickle.loads(data) self.assertTrue(f2 is f3) @@ -1091,7 +1105,7 @@ def example(): example() # smoke test - s = cloudpickle.dumps(example, protocol=self.protocol) + s = self.dumps(example) # refresh the environment, i.e., unimport the dependency del xml @@ -1116,7 +1130,7 @@ def example(): example = scope() example() # smoke test - s = cloudpickle.dumps(example, protocol=self.protocol) + s = self.dumps(example) # refresh the environment (unimport dependency) for item in list(sys.modules): @@ -1139,7 +1153,7 @@ def example(): example = scope() - s = cloudpickle.dumps(example, protocol=self.protocol) + s = self.dumps(example) # choose "subprocess" rather than "multiprocessing" because the latter # library uses fork to preserve the parent environment. @@ -1167,7 +1181,7 @@ def example(): example = scope() import xml.etree.ElementTree as etree - s = cloudpickle.dumps(example, protocol=self.protocol) + s = self.dumps(example) command = ( "import base64; from pickle import loads; loads(base64.b32decode('" @@ -1181,7 +1195,7 @@ def test_multiprocessing_lock_raises(self): with pytest.raises( RuntimeError, match="only be shared between processes through inheritance" ): - cloudpickle.dumps(lock) + self.dumps(lock) def test_cell_manipulation(self): cell = _make_empty_cell() @@ -1195,10 +1209,10 @@ def test_cell_manipulation(self): def check_logger(self, name): logger = logging.getLogger(name) - pickled = pickle_depickle(logger, protocol=self.protocol) + pickled = self.pickle_depickle(logger) self.assertTrue(pickled is logger, (pickled, logger)) - dumped = cloudpickle.dumps(logger) + dumped = self.dumps(logger) code = """if 1: import base64, cloudpickle, logging @@ -1224,7 +1238,7 @@ def test_logger(self): def test_getset_descriptor(self): assert isinstance(float.real, types.GetSetDescriptorType) - depickled_descriptor = pickle_depickle(float.real) + depickled_descriptor = self.pickle_depickle(float.real) self.assertIs(depickled_descriptor, float.real) def test_abc_cache_not_pickled(self): @@ -1244,14 +1258,17 @@ class MyRelatedClass: assert not issubclass(MyUnrelatedClass, MyClass) assert issubclass(MyRelatedClass, MyClass) - s = cloudpickle.dumps(MyClass) + s = self.dumps(MyClass) assert b"MyUnrelatedClass" not in s assert b"MyRelatedClass" in s depickled_class = cloudpickle.loads(s) assert not issubclass(MyUnrelatedClass, depickled_class) - assert issubclass(MyRelatedClass, depickled_class) + + config = get_config(self.config) + should_maintain_isinstance_semantics = config.id_generator is not None + assert issubclass(MyRelatedClass, depickled_class) == should_maintain_isinstance_semantics def test_abc(self): class AbstractClass(abc.ABC): @@ -1296,9 +1313,9 @@ def some_property(self): AbstractClass.register(tuple) concrete_instance = ConcreteClass() - depickled_base = pickle_depickle(AbstractClass, protocol=self.protocol) - depickled_class = pickle_depickle(ConcreteClass, protocol=self.protocol) - depickled_instance = pickle_depickle(concrete_instance) + depickled_base = self.pickle_depickle(AbstractClass) + depickled_class = self.pickle_depickle(ConcreteClass) + depickled_instance = self.pickle_depickle(concrete_instance) assert issubclass(tuple, AbstractClass) assert issubclass(tuple, depickled_base) @@ -1384,9 +1401,9 @@ def some_property(self): AbstractClass.register(tuple) concrete_instance = ConcreteClass() - depickled_base = pickle_depickle(AbstractClass, protocol=self.protocol) - depickled_class = pickle_depickle(ConcreteClass, protocol=self.protocol) - depickled_instance = pickle_depickle(concrete_instance) + depickled_base = self.pickle_depickle(AbstractClass) + depickled_class = self.pickle_depickle(ConcreteClass) + depickled_instance = self.pickle_depickle(concrete_instance) assert issubclass(tuple, AbstractClass) assert issubclass(tuple, depickled_base) @@ -1441,7 +1458,7 @@ def __init__(self, x): obj1, obj2, obj3 = SomeClass(1), SomeClass(2), SomeClass(3) things = [weakref.WeakSet([obj1, obj2]), obj1, obj2, obj3] - result = pickle_depickle(things, protocol=self.protocol) + result = self.pickle_depickle(things) weakset, depickled1, depickled2, depickled3 = result @@ -1498,7 +1515,7 @@ def __getattr__(self, name): assert func_module_name != "NonModuleObject" assert func_module_name is None - depickled_func = pickle_depickle(func, protocol=self.protocol) + depickled_func = self.pickle_depickle(func) assert depickled_func(2) == 4 finally: @@ -1564,10 +1581,10 @@ def foo(): try: # Test whichmodule in save_global. - self.assertEqual(pickle_depickle(Foo()).foo(), "it works!") + self.assertEqual(self.pickle_depickle(Foo()).foo(), "it works!") # Test whichmodule in save_function. - cloned = pickle_depickle(foo, protocol=self.protocol) + cloned = self.pickle_depickle(foo) self.assertEqual(cloned(), "it works!") finally: sys.modules.pop("_faulty_module", None) @@ -1577,7 +1594,7 @@ def local_func(x): return x for func in [local_func, lambda x: x]: - cloned = pickle_depickle(func, protocol=self.protocol) + cloned = self.pickle_depickle(func) self.assertEqual(cloned.__module__, func.__module__) def test_function_qualname(self): @@ -1586,12 +1603,12 @@ def func(x): # Default __qualname__ attribute (Python 3 only) if hasattr(func, "__qualname__"): - cloned = pickle_depickle(func, protocol=self.protocol) + cloned = self.pickle_depickle(func) self.assertEqual(cloned.__qualname__, func.__qualname__) # Mutated __qualname__ attribute func.__qualname__ = "" - cloned = pickle_depickle(func, protocol=self.protocol) + cloned = self.pickle_depickle(func) self.assertEqual(cloned.__qualname__, func.__qualname__) def test_property(self): @@ -1623,7 +1640,7 @@ def read_write_value(self, value): my_object.read_only_value = 2 my_object.read_write_value = 2 - depickled_obj = pickle_depickle(my_object) + depickled_obj = self.pickle_depickle(my_object) assert depickled_obj.read_only_value == 1 assert depickled_obj.read_write_value == 2 @@ -1642,14 +1659,16 @@ def test_namedtuple(self): t1 = MyTuple(1, 2, 3) t2 = MyTuple(3, 2, 1) - depickled_t1, depickled_MyTuple, depickled_t2 = pickle_depickle( - [t1, MyTuple, t2], protocol=self.protocol + depickled_t1, depickled_MyTuple, depickled_t2 = self.pickle_depickle( + [t1, MyTuple, t2] ) - assert isinstance(depickled_t1, MyTuple) + config = get_config(self.config) + should_maintain_isinstance_semantics = config.id_generator is not None + assert isinstance(depickled_t1, MyTuple) == should_maintain_isinstance_semantics assert depickled_t1 == t1 - assert depickled_MyTuple is MyTuple - assert isinstance(depickled_t2, MyTuple) + assert (depickled_MyTuple is MyTuple) == should_maintain_isinstance_semantics + assert isinstance(depickled_t2, MyTuple) == should_maintain_isinstance_semantics assert depickled_t2 == t2 def test_NamedTuple(self): @@ -1661,14 +1680,16 @@ class MyTuple(typing.NamedTuple): t1 = MyTuple(1, 2, 3) t2 = MyTuple(3, 2, 1) - depickled_t1, depickled_MyTuple, depickled_t2 = pickle_depickle( - [t1, MyTuple, t2], protocol=self.protocol + depickled_t1, depickled_MyTuple, depickled_t2 = self.pickle_depickle( + [t1, MyTuple, t2] ) - assert isinstance(depickled_t1, MyTuple) + config = get_config(self.config) + should_maintain_isinstance_semantics = config.id_generator is not None + assert isinstance(depickled_t1, MyTuple) == should_maintain_isinstance_semantics assert depickled_t1 == t1 - assert depickled_MyTuple is MyTuple - assert isinstance(depickled_t2, MyTuple) + assert (depickled_MyTuple is MyTuple) == should_maintain_isinstance_semantics + assert isinstance(depickled_t2, MyTuple) == should_maintain_isinstance_semantics assert depickled_t2 == t2 def test_interactively_defined_function(self): @@ -1707,33 +1728,33 @@ def f5(x): return f4(x) return f5(x - 1) + 1 - cloned = subprocess_pickle_echo(lambda x: x**2, protocol={protocol}) + cloned = subprocess_pickle_echo(lambda x: x**2, protocol={protocol}, config='{config}') assert cloned(3) == 9 - cloned = subprocess_pickle_echo(f0, protocol={protocol}) + cloned = subprocess_pickle_echo(f0, protocol={protocol}, config='{config}') assert cloned(3) == 9 - cloned = subprocess_pickle_echo(Foo, protocol={protocol}) + cloned = subprocess_pickle_echo(Foo, protocol={protocol}, config='{config}') assert cloned().method(2) == Foo().method(2) - cloned = subprocess_pickle_echo(Foo(), protocol={protocol}) + cloned = subprocess_pickle_echo(Foo(), protocol={protocol}, config='{config}') assert cloned.method(2) == Foo().method(2) - cloned = subprocess_pickle_echo(f1, protocol={protocol}) + cloned = subprocess_pickle_echo(f1, protocol={protocol}, config='{config}') assert cloned()().method('a') == f1()().method('a') - cloned = subprocess_pickle_echo(f2, protocol={protocol}) + cloned = subprocess_pickle_echo(f2, protocol={protocol}, config='{config}') assert cloned(2) == f2(2) - cloned = subprocess_pickle_echo(f3, protocol={protocol}) + cloned = subprocess_pickle_echo(f3, protocol={protocol}, config='{config}') assert cloned() == f3() - cloned = subprocess_pickle_echo(f4, protocol={protocol}) + cloned = subprocess_pickle_echo(f4, protocol={protocol}, config='{config}') assert cloned(2) == f4(2) - cloned = subprocess_pickle_echo(f5, protocol={protocol}) + cloned = subprocess_pickle_echo(f5, protocol={protocol}, config='{config}') assert cloned(7) == f5(7) == 7 - """.format(protocol=self.protocol) + """.format(protocol=self.protocol, config=self.config) assert_run_python_script(textwrap.dedent(code)) def test_interactively_defined_global_variable(self): @@ -1741,10 +1762,11 @@ def test_interactively_defined_global_variable(self): # script (or jupyter kernel) correctly retrieve global variables. code_template = """\ from testutils import subprocess_pickle_echo + from testutils import get_config from cloudpickle import dumps, loads - def local_clone(obj, protocol=None): - return loads(dumps(obj, protocol=protocol)) + def local_clone(obj, protocol=None, config=get_config('{config}')): + return loads(dumps(obj, protocol=protocol, config=config)) VARIABLE = "default_value" @@ -1758,7 +1780,7 @@ def f1(): assert f0.__globals__ is f1.__globals__ # pickle f0 and f1 inside the same pickle_string - cloned_f0, cloned_f1 = {clone_func}([f0, f1], protocol={protocol}) + cloned_f0, cloned_f1 = {clone_func}([f0, f1], protocol={protocol}, config='{config}') # cloned_f0 and cloned_f1 now share a global namespace that is isolated # from any previously existing namespace @@ -1766,7 +1788,7 @@ def f1(): assert cloned_f0.__globals__ is not f0.__globals__ # pickle f1 another time, but in a new pickle string - pickled_f1 = dumps(f1, protocol={protocol}) + pickled_f1 = dumps(f1, protocol={protocol}, config=get_config('{config}')) # Change the value of the global variable in f0's new global namespace cloned_f0() @@ -1793,7 +1815,7 @@ def f1(): assert new_global_var == "default_value", new_global_var """ for clone_func in ["local_clone", "subprocess_pickle_echo"]: - code = code_template.format(protocol=self.protocol, clone_func=clone_func) + code = code_template.format(protocol=self.protocol, config=self.config, clone_func=clone_func) assert_run_python_script(textwrap.dedent(code)) def test_closure_interacting_with_a_global_variable(self): @@ -1810,7 +1832,7 @@ def f1(): return _TEST_GLOBAL_VARIABLE # pickle f0 and f1 inside the same pickle_string - cloned_f0, cloned_f1 = pickle_depickle([f0, f1], protocol=self.protocol) + cloned_f0, cloned_f1 = self.pickle_depickle([f0, f1]) # cloned_f0 and cloned_f1 now share a global namespace that is # isolated from any previously existing namespace @@ -1818,7 +1840,7 @@ def f1(): assert cloned_f0.__globals__ is not f0.__globals__ # pickle f1 another time, but in a new pickle string - pickled_f1 = cloudpickle.dumps(f1, protocol=self.protocol) + pickled_f1 = self.dumps(f1) # Change the global variable's value in f0's new global namespace cloned_f0() @@ -1853,7 +1875,7 @@ def test_interactive_remote_function_calls(self): def interactive_function(x): return x + 1 - with subprocess_worker(protocol={protocol}) as w: + with subprocess_worker(protocol={protocol}, config='{config}') as w: assert w.run(interactive_function, 41) == 42 @@ -1872,7 +1894,7 @@ def interactive_function(x): # previous definition of `interactive_function`: assert w.run(wrapper_func, 41) == 40 - """.format(protocol=self.protocol) + """.format(protocol=self.protocol, config=self.config) assert_run_python_script(code) def test_interactive_remote_function_calls_no_side_effect(self): @@ -1880,7 +1902,7 @@ def test_interactive_remote_function_calls_no_side_effect(self): from testutils import subprocess_worker import sys - with subprocess_worker(protocol={protocol}) as w: + with subprocess_worker(protocol={protocol}, config='{config}') as w: GLOBAL_VARIABLE = 0 @@ -1916,14 +1938,14 @@ def is_in_main(name): assert is_in_main("GLOBAL_VARIABLE") assert not w.run(is_in_main, "GLOBAL_VARIABLE") - """.format(protocol=self.protocol) + """.format(protocol=self.protocol, config=self.config) assert_run_python_script(code) def test_interactive_dynamic_type_and_remote_instances(self): code = """if __name__ == "__main__": from testutils import subprocess_worker - with subprocess_worker(protocol={protocol}) as w: + with subprocess_worker(protocol={protocol}, config='{config}') as w: class CustomCounter: def __init__(self): @@ -1940,8 +1962,8 @@ def increment(self): # Check that the class definition of the returned instance was # matched back to the original class definition living in __main__. - - assert isinstance(returned_counter, CustomCounter) + should_maintain_isinstance_semantics = {should_maintain_isinstance_semantics} + assert isinstance(returned_counter, CustomCounter) == should_maintain_isinstance_semantics # Check that memoization does not break provenance tracking: @@ -1950,15 +1972,23 @@ def echo(*args): C1, C2, c1, c2 = w.run(echo, CustomCounter, CustomCounter, CustomCounter(), returned_counter) - assert C1 is CustomCounter - assert C2 is CustomCounter - assert isinstance(c1, CustomCounter) - assert isinstance(c2, CustomCounter) + assert (C1 is CustomCounter) == should_maintain_isinstance_semantics + assert (C2 is CustomCounter) == should_maintain_isinstance_semantics + assert isinstance(c1, CustomCounter) == should_maintain_isinstance_semantics + assert isinstance(c2, CustomCounter) == should_maintain_isinstance_semantics - """.format(protocol=self.protocol) + """.format( + protocol=self.protocol, + config=self.config, + should_maintain_isinstance_semantics=self.should_maintain_isinstance_semantics() + ) assert_run_python_script(code) def test_interactive_dynamic_type_and_stored_remote_instances(self): + if self.config == "no_tracking": + pytest.skip("Not dynamic types isinstance semantics") + if self.config == "skip_reset": + pytest.skip("Updating types does not reset cached state") """Simulate objects stored on workers to check isinstance semantics Such instances stored in the memory of running worker processes are @@ -1968,7 +1998,7 @@ def test_interactive_dynamic_type_and_stored_remote_instances(self): import cloudpickle, uuid from testutils import subprocess_worker - with subprocess_worker(protocol={protocol}) as w: + with subprocess_worker(protocol={protocol}, config='{config}') as w: class A: '''Original class definition''' @@ -2032,22 +2062,22 @@ class A: # method: assert w.run(lambda obj_id: lookup(obj_id).echo(43), id2) == 43 - """.format(protocol=self.protocol) + """.format(protocol=self.protocol, config=self.config) assert_run_python_script(code) def test_dynamic_func_deterministic_roundtrip(self): # Check that the pickle serialization for a dynamic func is the same # in two processes. - def get_dynamic_func_pickle(): + def get_dynamic_func_pickle(protocol, config): def test_method(arg_1, arg_2): pass - return cloudpickle.dumps(test_method) + return cloudpickle.dumps(test_method, protocol=protocol, config=get_config(config)) - with subprocess_worker(protocol=self.protocol) as w: - A_dump = w.run(get_dynamic_func_pickle) - check_deterministic_pickle(A_dump, get_dynamic_func_pickle()) + with subprocess_worker(protocol=self.protocol, config=self.config) as w: + A_dump = w.run(get_dynamic_func_pickle, self.protocol, self.config) + check_deterministic_pickle(A_dump, get_dynamic_func_pickle(self.protocol, self.config)) def test_dynamic_class_deterministic_roundtrip(self): # Check that the pickle serialization for a dynamic class is the same @@ -2066,9 +2096,9 @@ def join(self): def test_method(self, arg_1, join): pass - return cloudpickle.dumps(A) + return self.dumps(A) - with subprocess_worker(protocol=self.protocol) as w: + with subprocess_worker(protocol=self.protocol, config=self.config) as w: A_dump = w.run(get_dynamic_class_pickle) check_deterministic_pickle(A_dump, get_dynamic_class_pickle()) @@ -2079,15 +2109,15 @@ def test_deterministic_dynamic_class_attr_ordering_for_chained_pickling(self): # In particular, this test checks that the order of the class attributes is # deterministic. - with subprocess_worker(protocol=self.protocol) as w: + with subprocess_worker(protocol=self.protocol, config=self.config) as w: class A: """Simple class definition""" pass - A_dump = w.run(cloudpickle.dumps, A) - check_deterministic_pickle(A_dump, cloudpickle.dumps(A)) + A_dump = w.run(cloudpickle.dumps, A, protocol=self.protocol, config=get_config(self.config)) + check_deterministic_pickle(A_dump, self.dumps(A)) # If the `__doc__` attribute is defined after some other class # attribute, this can cause class attribute ordering changes due to @@ -2098,8 +2128,8 @@ class A: name = "A" __doc__ = "Updated class definition" - A_dump = w.run(cloudpickle.dumps, A) - check_deterministic_pickle(A_dump, cloudpickle.dumps(A)) + A_dump = w.run(cloudpickle.dumps, A, protocol=self.protocol, config=get_config(self.config)) + check_deterministic_pickle(A_dump, self.dumps(A)) # If a `__doc__` is defined on the `__init__` method, this can # cause ordering changes due to the way we reconstruct the class @@ -2109,8 +2139,8 @@ def __init__(self): """Class definition with explicit __init__""" pass - A_dump = w.run(cloudpickle.dumps, A) - check_deterministic_pickle(A_dump, cloudpickle.dumps(A)) + A_dump = w.run(cloudpickle.dumps, A, protocol=self.protocol, config=get_config(self.config)) + check_deterministic_pickle(A_dump, self.dumps(A)) def test_deterministic_str_interning_for_chained_dynamic_class_pickling(self): # Check that the pickle produced by the unpickled instance is the same. @@ -2118,7 +2148,7 @@ def test_deterministic_str_interning_for_chained_dynamic_class_pickling(self): # the names of attributes of class definitions and names of attributes # of the `__code__` objects of the methods. - with subprocess_worker(protocol=self.protocol) as w: + with subprocess_worker(protocol=self.protocol, config=self.config) as w: # Due to interning of class attributes, check that this does not # create issues with dynamic function definition. class A: @@ -2132,8 +2162,8 @@ def join(self): def test_method(self, arg_1, join): pass - A_dump = w.run(cloudpickle.dumps, A) - check_deterministic_pickle(A_dump, cloudpickle.dumps(A)) + A_dump = w.run(cloudpickle.dumps, A, protocol=self.protocol, config=get_config(self.config)) + check_deterministic_pickle(A_dump, self.dumps(A)) # Also check that memoization of string value inside the class does # not cause non-deterministic pickle with interned method names. @@ -2150,14 +2180,14 @@ def join(self, arg_1): # the string used for the attribute name. A.join.arg_1 = "join" - A_dump = w.run(cloudpickle.dumps, A) - check_deterministic_pickle(A_dump, cloudpickle.dumps(A)) + A_dump = w.run(cloudpickle.dumps, A, protocol=self.protocol, config=get_config(self.config)) + check_deterministic_pickle(A_dump, self.dumps(A)) def test_dynamic_class_determinist_subworker_tuple_memoization(self): # Check that the pickle produced by the unpickled instance is the same. # This highlights some issues with tuple memoization. - with subprocess_worker(protocol=self.protocol) as w: + with subprocess_worker(protocol=self.protocol, config=self.config) as w: # Arguments' tuple is memoized in the main process but not in the # subprocess as the tuples do not share the same id in the loaded # class. @@ -2170,8 +2200,8 @@ def func1(self): def func2(self): pass - A_dump = w.run(cloudpickle.dumps, A) - check_deterministic_pickle(A_dump, cloudpickle.dumps(A)) + A_dump = w.run(cloudpickle.dumps, A, protocol=self.protocol, config=get_config(self.config)) + check_deterministic_pickle(A_dump, self.dumps(A)) @pytest.mark.skipif( platform.python_implementation() == "PyPy", @@ -2182,7 +2212,7 @@ def test_interactive_remote_function_calls_no_memory_leak(self): from testutils import subprocess_worker import struct - with subprocess_worker(protocol={protocol}) as w: + with subprocess_worker(protocol={protocol}, config='{config}') as w: reference_size = w.memsize() assert reference_size > 0 @@ -2220,18 +2250,18 @@ def process_data(): # iterations instead of 100 as used now (100x more data) assert growth < 5e7, growth - """.format(protocol=self.protocol) + """.format(protocol=self.protocol, config=self.config) assert_run_python_script(code) def test_pickle_reraise(self): for exc_type in [Exception, ValueError, TypeError, RuntimeError]: obj = RaiserOnPickle(exc_type("foo")) with pytest.raises((exc_type, pickle.PicklingError)): - cloudpickle.dumps(obj, protocol=self.protocol) + self.dumps(obj) def test_unhashable_function(self): d = {"a": 1} - depickled_method = pickle_depickle(d.get, protocol=self.protocol) + depickled_method = self.pickle_depickle(d.get) self.assertEqual(depickled_method("a"), 1) self.assertEqual(depickled_method("b"), None) @@ -2246,7 +2276,7 @@ def test_itertools_count(self): next(counter) next(counter) - new_counter = pickle_depickle(counter, protocol=self.protocol) + new_counter = self.pickle_depickle(counter) self.assertTrue(counter is not new_counter) @@ -2263,7 +2293,7 @@ def f(): def g(): f() - f2 = pickle_depickle(g, protocol=self.protocol) + f2 = self.pickle_depickle(g) self.assertEqual(f2.__name__, f.__name__) @@ -2278,7 +2308,7 @@ def f(): def g(): f() - f2 = pickle_depickle(g, protocol=self.protocol) + f2 = self.pickle_depickle(g) self.assertEqual(f2.__doc__, f.__doc__) @@ -2290,15 +2320,17 @@ def f(x: int) -> float: def g(x): f(x) - f2 = pickle_depickle(g, protocol=self.protocol) + f2 = self.pickle_depickle(g) self.assertEqual(f2.__annotations__, f.__annotations__) def test_type_hint(self): t = typing.Union[list, int] - assert pickle_depickle(t) == t + assert self.pickle_depickle(t) == t def test_instance_with_slots(self): + config = get_config(self.config) + should_maintain_isinstance_semantics = config.id_generator is not None for slots in [["registered_attribute"], "registered_attribute"]: class ClassWithSlots: @@ -2308,21 +2340,23 @@ def __init__(self): self.registered_attribute = 42 initial_obj = ClassWithSlots() - depickled_obj = pickle_depickle(initial_obj, protocol=self.protocol) + depickled_obj = self.pickle_depickle(initial_obj) assert depickled_obj.__class__.__slots__ == slots for obj in [initial_obj, depickled_obj]: self.assertEqual(obj.registered_attribute, 42) - with pytest.raises(AttributeError): - obj.non_registered_attribute = 1 + # I think this only throws if the original type is still defined + if should_maintain_isinstance_semantics: + with pytest.raises(AttributeError): + obj.non_registered_attribute = 1 class SubclassWithSlots(ClassWithSlots): def __init__(self): self.unregistered_attribute = 1 obj = SubclassWithSlots() - s = cloudpickle.dumps(obj, protocol=self.protocol) + s = self.dumps(obj) del SubclassWithSlots depickled_obj = cloudpickle.loads(s) assert depickled_obj.unregistered_attribute == 1 @@ -2333,7 +2367,7 @@ def __init__(self): ) def test_mappingproxy(self): mp = types.MappingProxyType({"some_key": "some value"}) - assert mp == pickle_depickle(mp, protocol=self.protocol) + assert mp == self.pickle_depickle(mp) def test_dataclass(self): dataclasses = pytest.importorskip("dataclasses") @@ -2341,8 +2375,8 @@ def test_dataclass(self): DataClass = dataclasses.make_dataclass("DataClass", [("x", int)]) data = DataClass(x=42) - pickle_depickle(DataClass, protocol=self.protocol) - assert data.x == pickle_depickle(data, protocol=self.protocol).x == 42 + self.pickle_depickle(DataClass) + assert data.x == self.pickle_depickle(data).x == 42 def test_locally_defined_enum(self): class StringEnum(str, enum.Enum): @@ -2358,8 +2392,8 @@ class Color(StringEnum): def is_green(self): return self is Color.GREEN - green1, green2, ClonedColor = pickle_depickle( - [Color.GREEN, Color.GREEN, Color], protocol=self.protocol + green1, green2, ClonedColor = self.pickle_depickle( + [Color.GREEN, Color.GREEN, Color] ) assert green1 is green2 assert green1 is ClonedColor.GREEN @@ -2369,32 +2403,31 @@ def is_green(self): # cloudpickle systematically tracks provenance of class definitions # and ensure reconciliation in case of round trips: - assert green1 is Color.GREEN - assert ClonedColor is Color + assert (green1 is Color.GREEN) == self.should_maintain_isinstance_semantics() + assert (ClonedColor is Color) == self.should_maintain_isinstance_semantics() - green3 = pickle_depickle(Color.GREEN, protocol=self.protocol) - assert green3 is Color.GREEN + green3 = self.pickle_depickle(Color.GREEN) + assert (green3 is Color.GREEN) == self.should_maintain_isinstance_semantics() def test_locally_defined_intenum(self): # Try again with a IntEnum defined with the functional API DynamicColor = enum.IntEnum("Color", {"RED": 1, "GREEN": 2, "BLUE": 3}) - green1, green2, ClonedDynamicColor = pickle_depickle( - [DynamicColor.GREEN, DynamicColor.GREEN, DynamicColor], - protocol=self.protocol, + green1, green2, ClonedDynamicColor = self.pickle_depickle( + [DynamicColor.GREEN, DynamicColor.GREEN, DynamicColor] ) assert green1 is green2 assert green1 is ClonedDynamicColor.GREEN assert green1 is not ClonedDynamicColor.BLUE - assert ClonedDynamicColor is DynamicColor + assert (ClonedDynamicColor is DynamicColor) == self.should_maintain_isinstance_semantics() def test_interactively_defined_enum(self): code = """if __name__ == "__main__": from enum import Enum from testutils import subprocess_worker - with subprocess_worker(protocol={protocol}) as w: + with subprocess_worker(protocol={protocol}, config='{config}') as w: class Color(Enum): RED = 1 @@ -2408,7 +2441,8 @@ def check_positive(x): # Check that the returned enum instance is reconciled with the # locally defined Color enum type definition: - assert result is Color.GREEN + should_maintain_isinstance_semantics = {should_maintain_isinstance_semantics} + assert (result is Color.GREEN) == should_maintain_isinstance_semantics # Check that changing the definition of the Enum class is taken # into account on the worker for subsequent calls: @@ -2421,8 +2455,12 @@ def check_positive(x): return Color.BLUE if x >= 0 else Color.RED result = w.run(check_positive, 1) - assert result is Color.BLUE - """.format(protocol=self.protocol) + assert (result is Color.BLUE) == should_maintain_isinstance_semantics + """.format( + protocol=self.protocol, + config=self.config, + should_maintain_isinstance_semantics=self.should_maintain_isinstance_semantics() + ) assert_run_python_script(code) def test_relative_import_inside_function(self): @@ -2437,7 +2475,7 @@ def test_relative_import_inside_function(self): assert func() == f"hello from a {source}!" # Make sure relative imports still work after round-tripping - cloned_func = pickle_depickle(func, protocol=self.protocol) + cloned_func = self.pickle_depickle(func) assert cloned_func() == f"hello from a {source}!" def test_interactively_defined_func_with_keyword_only_argument(self): @@ -2445,7 +2483,7 @@ def test_interactively_defined_func_with_keyword_only_argument(self): def f(a, *, b=1): return a + b - depickled_f = pickle_depickle(f, protocol=self.protocol) + depickled_f = self.pickle_depickle(f) for func in (f, depickled_f): assert func(2) == 3 @@ -2476,7 +2514,7 @@ def f(a, /, b=1): with pytest.raises(TypeError): func(a=2) - """.format(protocol=self.protocol) + """.format(protocol=self.protocol, config=self.config) assert_run_python_script(textwrap.dedent(code)) def test___reduce___returns_string(self): @@ -2485,7 +2523,7 @@ def test___reduce___returns_string(self): _cloudpickle_testpkg = pytest.importorskip("_cloudpickle_testpkg") some_singleton = _cloudpickle_testpkg.some_singleton assert some_singleton.__reduce__() == "some_singleton" - depickled_singleton = pickle_depickle(some_singleton, protocol=self.protocol) + depickled_singleton = self.pickle_depickle(some_singleton) assert depickled_singleton is some_singleton def test_cloudpickle_extract_nested_globals(self): @@ -2503,7 +2541,7 @@ def inner_function(): ) assert globals_ == {"_TEST_GLOBAL_VARIABLE"} - depickled_factory = pickle_depickle(function_factory, protocol=self.protocol) + depickled_factory = self.pickle_depickle(function_factory) inner_func = depickled_factory() assert inner_func() == _TEST_GLOBAL_VARIABLE @@ -2520,7 +2558,7 @@ def __getattribute__(self, name): a = A() with pytest.raises(pickle.PicklingError, match="deep recursion"): - cloudpickle.dumps(a) + self.dumps(a) def test_out_of_band_buffers(self): if self.protocol < 5: @@ -2532,8 +2570,8 @@ class LocallyDefinedClass: data_instance = LocallyDefinedClass() buffers = [] - pickle_bytes = cloudpickle.dumps( - data_instance, protocol=self.protocol, buffer_callback=buffers.append + pickle_bytes = self.dumps( + data_instance, buffer_callback=buffers.append ) assert len(buffers) == 1 reconstructed = pickle.loads(pickle_bytes, buffers=buffers) @@ -2541,7 +2579,7 @@ class LocallyDefinedClass: def test_pickle_dynamic_typevar(self): T = typing.TypeVar("T") - depickled_T = pickle_depickle(T, protocol=self.protocol) + depickled_T = self.pickle_depickle(T) attr_list = [ "__name__", "__bound__", @@ -2554,37 +2592,39 @@ def test_pickle_dynamic_typevar(self): def test_pickle_dynamic_typevar_tracking(self): T = typing.TypeVar("T") - T2 = subprocess_pickle_echo(T, protocol=self.protocol) - assert T is T2 + T2 = subprocess_pickle_echo(T, self.protocol, self.config) + assert (T is T2) == self.should_maintain_isinstance_semantics() def test_pickle_dynamic_typevar_memoization(self): T = typing.TypeVar("T") - depickled_T1, depickled_T2 = pickle_depickle((T, T), protocol=self.protocol) + depickled_T1, depickled_T2 = self.pickle_depickle((T, T)) assert depickled_T1 is depickled_T2 def test_pickle_importable_typevar(self): _cloudpickle_testpkg = pytest.importorskip("_cloudpickle_testpkg") - T1 = pickle_depickle(_cloudpickle_testpkg.T, protocol=self.protocol) + T1 = self.pickle_depickle(_cloudpickle_testpkg.T) assert T1 is _cloudpickle_testpkg.T # Standard Library TypeVar from typing import AnyStr - assert AnyStr is pickle_depickle(AnyStr, protocol=self.protocol) + assert AnyStr is self.pickle_depickle(AnyStr) def test_generic_type(self): T = typing.TypeVar("T") class C(typing.Generic[T]): pass - - assert pickle_depickle(C, protocol=self.protocol) is C + + config = get_config(self.config) + should_maintain_isinstance_semantics = config.id_generator is not None + assert (self.pickle_depickle(C) is C) == should_maintain_isinstance_semantics # Identity is not part of the typing contract: only test for # equality instead. - assert pickle_depickle(C[int], protocol=self.protocol) == C[int] + assert (self.pickle_depickle(C[int]) == C[int]) == should_maintain_isinstance_semantics - with subprocess_worker(protocol=self.protocol) as worker: + with subprocess_worker(protocol=self.protocol, config=self.config) as worker: def check_generic(generic, origin, type_value): assert generic.__origin__ is origin @@ -2627,10 +2667,12 @@ class LeafT(DerivedT[T]): pass klasses = [Base, DerivedAny, LeafAny, DerivedInt, LeafInt, DerivedT, LeafT] + config = get_config(self.config) + should_maintain_isinstance_semantics = config.id_generator is not None for klass in klasses: - assert pickle_depickle(klass, protocol=self.protocol) is klass + assert (self.pickle_depickle(klass) is klass) == should_maintain_isinstance_semantics - with subprocess_worker(protocol=self.protocol) as worker: + with subprocess_worker(protocol=self.protocol, config=self.config) as worker: def check_mro(klass, expected_mro): assert klass.mro() == expected_mro @@ -2642,7 +2684,7 @@ def check_mro(klass, expected_mro): assert worker.run(check_mro, klass, mro) == "ok" def test_locally_defined_class_with_type_hints(self): - with subprocess_worker(protocol=self.protocol) as worker: + with subprocess_worker(protocol=self.protocol, config=self.config) as worker: for type_ in _all_types_to_test(): class MyClass: @@ -2669,14 +2711,14 @@ class C: C.__annotations__ = {"a": int} - C1 = pickle_depickle(C, protocol=self.protocol) + C1 = self.pickle_depickle(C) assert C1.__annotations__ == C.__annotations__ def test_function_annotations(self): def f(a: int) -> str: pass - f1 = pickle_depickle(f, protocol=self.protocol) + f1 = self.pickle_depickle(f) assert f1.__annotations__ == f.__annotations__ def test_always_use_up_to_date_copyreg(self): @@ -2694,7 +2736,7 @@ def reduce_myclass(x): copyreg.dispatch_table[MyClass] = reduce_myclass my_obj = MyClass() - depickled_myobj = pickle_depickle(my_obj, protocol=self.protocol) + depickled_myobj = self.pickle_depickle(my_obj) assert hasattr(depickled_myobj, "custom_reduce") finally: copyreg.dispatch_table.pop(MyClass) @@ -2707,7 +2749,7 @@ def __values__(self): return () o = MyClass() - pickle_depickle(o, protocol=self.protocol) + self.pickle_depickle(o) def test_final_or_classvar_misdetection(self): # see https://github.com/cloudpipe/cloudpickle/issues/403 @@ -2717,7 +2759,7 @@ def __type__(self): return int o = MyClass() - pickle_depickle(o, protocol=self.protocol) + self.pickle_depickle(o) def test_pickle_constructs_from_module_registered_for_pickling_by_value( self, @@ -2742,7 +2784,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value( # Add the desired session working directory sys.path.insert(0, _mock_interactive_session_cwd) - with subprocess_worker(protocol=self.protocol) as w: + with subprocess_worker(protocol=self.protocol, config=self.config) as w: # Make the module unavailable in the remote worker w.run(lambda p: sys.path.remove(p), _mock_interactive_session_cwd) # Import the actual file after starting the module since the @@ -2878,7 +2920,7 @@ def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_va f = m.module_function_with_global _original_global = m.global_variable try: - with subprocess_worker(protocol=self.protocol) as w: + with subprocess_worker(protocol=self.protocol, config=self.config) as w: assert w.run(lambda: f()) == _original_global # Test that f is pickled by value by modifying a global @@ -2920,7 +2962,7 @@ def _call_from_registry(k): return _main._cloudpickle_registry[k]() try: - with subprocess_worker(protocol=self.protocol) as w: + with subprocess_worker(protocol=self.protocol, config=self.config) as w: w.run(_create_registry) w.run(_add_to_registry, f, "f_by_ref") @@ -2950,6 +2992,7 @@ def func_with_globals(): subprocess_pickle_string( func_with_globals, protocol=self.protocol, + config=self.config, add_env={"PYTHONHASHSEED": str(i)}, ) ) @@ -2966,8 +3009,8 @@ class SampleDataclass: y: dataclasses.InitVar[int] z: typing.ClassVar[int] - PickledSampleDataclass = pickle_depickle( - SampleDataclass, protocol=self.protocol + PickledSampleDataclass = self.pickle_depickle( + SampleDataclass ) found_fields = list(PickledSampleDataclass.__dataclass_fields__.values()) @@ -2988,7 +3031,7 @@ def test_interactively_defined_dataclass_with_initvar_and_classvar(self): from testutils import subprocess_worker import typing - with subprocess_worker(protocol={protocol}) as w: + with subprocess_worker(protocol={protocol}, config='{config}') as w: @dataclasses.dataclass class SampleDataclass: @@ -3031,15 +3074,29 @@ def echo(*args): return args cloned_value, cloned_type = w.run(echo, value, SampleDataclass) - assert cloned_type is SampleDataclass - assert isinstance(cloned_value, SampleDataclass) - """.format(protocol=self.protocol) + + should_maintain_isinstance_semantics = {should_maintain_isinstance_semantics} + assert (cloned_type is SampleDataclass) == should_maintain_isinstance_semantics + assert isinstance(cloned_value, SampleDataclass) == should_maintain_isinstance_semantics + """.format( + protocol=self.protocol, + config=self.config, + should_maintain_isinstance_semantics=self.should_maintain_isinstance_semantics() + ) assert_run_python_script(code) class Protocol2CloudPickleTest(CloudPickleTest): protocol = 2 +class SequentialConfigCloudPickleTest(CloudPickleTest): + config = 'sequential' + +class NoTrackingConfigCloudPickleTest(CloudPickleTest): + config = 'no_tracking' + +class SkipResetConfigCloudPickleTest(CloudPickleTest): + config = 'skip_reset' def test_lookup_module_and_qualname_dynamic_typevar(): T = typing.TypeVar("T") diff --git a/tests/testutils.py b/tests/testutils.py index bf2d3bca..367dc752 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -12,13 +12,40 @@ import psutil from cloudpickle import dumps +from cloudpickle import CloudPickleConfig +from cloudpickle import DEFAULT_CONFIG from subprocess import TimeoutExpired +import logging loads = pickle.loads TIMEOUT = 60 TEST_GLOBALS = "a test value" +_NEXT_DYNAMIC_CLASS_TRACKER_ID = 1 + +def sequential_id_generator(_): + # logging.warning('CLAUDE HELLO ') + # raise ValueError('hello') + global _NEXT_DYNAMIC_CLASS_TRACKER_ID + _NEXT_DYNAMIC_CLASS_TRACKER_ID += 1 + return str(_NEXT_DYNAMIC_CLASS_TRACKER_ID) + +_SEQUENTIAL_CONFIG = CloudPickleConfig(id_generator=sequential_id_generator) +_NO_TRACKING_CONFIG = CloudPickleConfig(id_generator=None) +_SKIP_RESET_CONFIG = CloudPickleConfig(skip_reset_dynamic_type_state=True) + + +CONFIG_REGISTRY = { + "default": DEFAULT_CONFIG, + "sequential": _SEQUENTIAL_CONFIG, + "no_tracking": _NO_TRACKING_CONFIG, + "skip_reset": _SKIP_RESET_CONFIG +} + +def get_config(config_key): + return CONFIG_REGISTRY[config_key] + def make_local_function(): def g(x): # this function checks that the globals are correctly handled and that @@ -40,7 +67,7 @@ def _make_cwd_env(): return cloudpickle_repo_folder, env -def subprocess_pickle_string(input_data, protocol=None, timeout=TIMEOUT, add_env=None): +def subprocess_pickle_string(input_data, protocol=None, config=None, timeout=TIMEOUT, add_env=None): """Retrieve pickle string of an object generated by a child Python process Pickle the input data into a buffer, send it to a subprocess via @@ -56,14 +83,14 @@ def subprocess_pickle_string(input_data, protocol=None, timeout=TIMEOUT, add_env # Protect stderr from any warning, as we will assume an error will happen # if it is not empty. A concrete example is pytest using the imp module, # which is deprecated in python 3.8 - cmd = [sys.executable, "-W ignore", __file__, "--protocol", str(protocol)] + cmd = [sys.executable, "-W ignore", __file__, "--protocol", str(protocol), "--config", config] cwd, env = _make_cwd_env() if add_env: env.update(add_env) proc = Popen( cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd, env=env, bufsize=4096 ) - pickle_string = dumps(input_data, protocol=protocol) + pickle_string = dumps(input_data, protocol=protocol, config=get_config(config)) try: comm_kwargs = {} comm_kwargs["timeout"] = timeout @@ -80,7 +107,7 @@ def subprocess_pickle_string(input_data, protocol=None, timeout=TIMEOUT, add_env raise RuntimeError(message) from e -def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT, add_env=None): +def subprocess_pickle_echo(input_data, protocol=None, config=None, timeout=TIMEOUT, add_env=None): """Echo function with a child Python process Pickle the input data into a buffer, send it to a subprocess via stdin, expect the subprocess to unpickle, re-pickle that data back @@ -89,7 +116,7 @@ def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT, add_env=N [1, 'a', None] """ out = subprocess_pickle_string( - input_data, protocol=protocol, timeout=timeout, add_env=add_env + input_data, protocol=protocol, config=config, timeout=timeout, add_env=add_env ) return loads(out) @@ -104,7 +131,7 @@ def _read_all_bytes(stream_in, chunk_size=4096): return all_data -def pickle_echo(stream_in=None, stream_out=None, protocol=None): +def pickle_echo(stream_in=None, stream_out=None, protocol=None, config=None): """Read a pickle from stdin and pickle it back to stdout""" if stream_in is None: stream_in = sys.stdin @@ -120,33 +147,36 @@ def pickle_echo(stream_in=None, stream_out=None, protocol=None): input_bytes = _read_all_bytes(stream_in) stream_in.close() obj = loads(input_bytes) - repickled_bytes = dumps(obj, protocol=protocol) + # logging.warning(f"CLAUDE {config} {get_config(config)}") + repickled_bytes = dumps(obj, protocol=protocol, config=get_config(config)) stream_out.write(repickled_bytes) stream_out.close() -def call_func(payload, protocol): +def call_func(payload, protocol, config): + """Remote function call that uses cloudpickle to transport everthing""" func, args, kwargs = loads(payload) try: result = func(*args, **kwargs) except BaseException as e: result = e - return dumps(result, protocol=protocol) + return dumps(result, protocol=protocol, config=get_config(config)) class _Worker: - def __init__(self, protocol=None): + def __init__(self, protocol=None, config=None): self.protocol = protocol + self.config = config self.pool = ProcessPoolExecutor(max_workers=1) self.pool.submit(id, 42).result() # start the worker process def run(self, func, *args, **kwargs): """Synchronous remote function call""" - input_payload = dumps((func, args, kwargs), protocol=self.protocol) + input_payload = dumps((func, args, kwargs), protocol=self.protocol, config=get_config(self.config)) result_payload = self.pool.submit( - call_func, input_payload, self.protocol + call_func, input_payload, self.protocol, self.config ).result() result = loads(result_payload) @@ -170,8 +200,8 @@ def close(self): @contextmanager -def subprocess_worker(protocol=None): - worker = _Worker(protocol=protocol) +def subprocess_worker(protocol=None, config=None): + worker = _Worker(protocol=protocol, config=config) yield worker worker.close() @@ -251,4 +281,5 @@ def check_deterministic_pickle(a, b): if __name__ == "__main__": protocol = int(sys.argv[sys.argv.index("--protocol") + 1]) - pickle_echo(protocol=protocol) + config = sys.argv[sys.argv.index("--config") + 1] + pickle_echo(protocol=protocol, config=config) From c753da695002e5d69106744352238ab77ec6e8fa Mon Sep 17 00:00:00 2001 From: claudevdm Date: Tue, 24 Jun 2025 12:38:37 -0400 Subject: [PATCH 02/13] Don't pickle config. --- cloudpickle/cloudpickle.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 81b963c6..8a423247 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -118,11 +118,11 @@ class CloudPickleConfig: _extract_code_globals_cache = weakref.WeakKeyDictionary() -def _get_or_create_tracker_id(class_def, config): +def _get_or_create_tracker_id(class_def, id_generator): with _DYNAMIC_CLASS_TRACKER_LOCK: class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) - if class_tracker_id is None and config.id_generator is not None: - class_tracker_id = config.id_generator(class_def) + if class_tracker_id is None and id_generator is not None: + class_tracker_id = id_generator(class_def) _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def return class_tracker_id @@ -619,7 +619,7 @@ def _decompose_typevar(obj, config): obj.__constraints__, obj.__covariant__, obj.__contravariant__, - _get_or_create_tracker_id(obj, config), + _get_or_create_tracker_id(obj, config.id_generator), ) @@ -689,7 +689,7 @@ def _class_getnewargs(obj, config): obj.__name__, _get_bases(obj), type_kwargs, - _get_or_create_tracker_id(obj, config), + _get_or_create_tracker_id(obj, config.id_generator), None, ) @@ -702,7 +702,7 @@ def _enum_getnewargs(obj, config): obj.__qualname__, members, obj.__module__, - _get_or_create_tracker_id(obj, config), + _get_or_create_tracker_id(obj, config.id_generator), None, ) @@ -1060,7 +1060,9 @@ def _dynamic_class_reduce(obj, config): _enum_getstate(obj), None, None, - functools.partial(_class_setstate, config=config), + functools.partial( + _class_setstate, + skip_reset_dynamic_type_state=config.skip_reset_dynamic_type_state), ) else: return ( @@ -1069,7 +1071,9 @@ def _dynamic_class_reduce(obj, config): _class_getstate(obj), None, None, - functools.partial(_class_setstate, config=config), + functools.partial( + _class_setstate, + skip_reset_dynamic_type_state=config.skip_reset_dynamic_type_state), ) @@ -1167,10 +1171,10 @@ def _function_setstate(obj, state): for k, v in slotstate.items(): setattr(obj, k, v) -def _class_setstate(obj, state, config): +def _class_setstate(obj, state, skip_reset_dynamic_type_state): # Lock while potentially modifying class state. with _DYNAMIC_CLASS_TRACKER_LOCK: - if config.skip_reset_dynamic_type_state and obj in _DYNAMIC_CLASS_STATE_TRACKER_BY_CLASS: + if skip_reset_dynamic_type_state and obj in _DYNAMIC_CLASS_STATE_TRACKER_BY_CLASS: return obj _DYNAMIC_CLASS_STATE_TRACKER_BY_CLASS[obj] = True state, slotstate = state From 6fb8eca2f959aae82e98f2649d4f6986219ca254 Mon Sep 17 00:00:00 2001 From: claudevdm Date: Fri, 26 Sep 2025 12:05:17 -0400 Subject: [PATCH 03/13] Fix pypi typevar reduce. --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 8a423247..1d57825d 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1479,7 +1479,7 @@ def save_global(self, obj, name=None, pack=struct.pack): def save_typevar(self, obj, name=None): """Handle TypeVar objects with access to config.""" - return self._save_reduce_pickle5(*_typevar_reduce(obj, self.config), obj=obj) + return self.save_reduce(*_typevar_reduce(obj, self.config), obj=obj) dispatch[typing.TypeVar] = save_typevar From de182983e5a6521bc64e08df274a09d8b5669e35 Mon Sep 17 00:00:00 2001 From: claudevdm Date: Fri, 26 Sep 2025 11:06:00 -0400 Subject: [PATCH 04/13] Add option to use relative filepaths for pickling code objects. --- cloudpickle/cloudpickle.py | 45 +++++++++++++++++++++++++++++++++----- tests/cloudpickle_test.py | 7 ++++-- tests/testutils.py | 4 +++- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 1d57825d..d35c5dbc 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -63,6 +63,7 @@ import itertools import logging import opcode +import os import pickle from pickle import _getattribute as _pickle_getattribute import platform @@ -107,6 +108,7 @@ class CloudPickleConfig: """Configuration for cloudpickle behavior.""" id_generator: typing.Optional[callable] = uuid_generator skip_reset_dynamic_type_state: bool = False + use_relative_filepaths: bool = False DEFAULT_CONFIG = CloudPickleConfig() @@ -397,6 +399,26 @@ def func(): return subimports +def _get_relative_path(path): + """Returns the path of a filename relative to the longest matching directory + in sys.path. + Args: + path: The path to the file. + """ + abs_path = os.path.abspath(path) + longest_match = "" + + for dir_path in sys.path: + if not dir_path.endswith(os.path.sep): + dir_path += os.path.sep + + if abs_path.startswith(dir_path) and len(dir_path) > len(longest_match): + longest_match = dir_path + + if not longest_match: + return path + return os.path.relpath(abs_path, longest_match) + # relevant opcodes STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"] DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"] @@ -831,7 +853,7 @@ def _enum_getstate(obj): # these holes". -def _code_reduce(obj): +def _code_reduce(obj, config): """code object reducer.""" # If you are not sure about the order of arguments, take a look at help # of the specific type from types, for example: @@ -850,6 +872,11 @@ def _code_reduce(obj): co_varnames = tuple(name for name in obj.co_varnames) co_freevars = tuple(name for name in obj.co_freevars) co_cellvars = tuple(name for name in obj.co_cellvars) + + co_filename = obj.co_filename + if (config and config.use_relative_filepaths): + co_filename = _get_relative_path(co_filename) + if hasattr(obj, "co_exceptiontable"): # Python 3.11 and later: there are some new attributes # related to the enhanced exceptions. @@ -864,7 +891,7 @@ def _code_reduce(obj): obj.co_consts, co_names, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_qualname, obj.co_firstlineno, @@ -887,7 +914,7 @@ def _code_reduce(obj): obj.co_consts, co_names, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_firstlineno, obj.co_linetable, @@ -908,7 +935,7 @@ def _code_reduce(obj): obj.co_code, obj.co_consts, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_firstlineno, obj.co_lnotab, @@ -932,7 +959,7 @@ def _code_reduce(obj): obj.co_consts, co_names, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_firstlineno, obj.co_lnotab, @@ -1243,7 +1270,6 @@ class Pickler(pickle.Pickler): _dispatch_table[property] = _property_reduce _dispatch_table[staticmethod] = _classmethod_reduce _dispatch_table[CellType] = _cell_reduce - _dispatch_table[types.CodeType] = _code_reduce _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce _dispatch_table[types.ModuleType] = _module_reduce _dispatch_table[types.MethodType] = _method_reduce @@ -1404,6 +1430,8 @@ def reducer_override(self, obj): return _class_reduce(obj, self.config) elif isinstance(obj, typing.TypeVar): # Add this check return _typevar_reduce(obj, self.config) + elif isinstance(obj, types.CodeType): + return _code_reduce(obj, self.config) elif isinstance(obj, types.FunctionType): return self._function_reduce(obj) else: @@ -1483,6 +1511,11 @@ def save_typevar(self, obj, name=None): dispatch[typing.TypeVar] = save_typevar + def save_code(self, obj, name=None): + return self.save_reduce(*_code_reduce(obj, self.config), obj=obj) + + dispatch[types.CodeType] = save_code + def save_function(self, obj, name=None): """Registered with the dispatch to handle all function types. diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 17c00867..5c6aa400 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1765,8 +1765,8 @@ def test_interactively_defined_global_variable(self): from testutils import get_config from cloudpickle import dumps, loads - def local_clone(obj, protocol=None, config=get_config('{config}')): - return loads(dumps(obj, protocol=protocol, config=config)) + def local_clone(obj, protocol=None, config='{config}'): + return loads(dumps(obj, protocol=protocol, config=get_config('{config}'))) VARIABLE = "default_value" @@ -3098,6 +3098,9 @@ class NoTrackingConfigCloudPickleTest(CloudPickleTest): class SkipResetConfigCloudPickleTest(CloudPickleTest): config = 'skip_reset' +class UseRelativeFilepathsCloudPickleTest(CloudPickleTest): + config = 'use_relative_filepaths' + def test_lookup_module_and_qualname_dynamic_typevar(): T = typing.TypeVar("T") module_and_name = _lookup_module_and_qualname(T, name=T.__name__) diff --git a/tests/testutils.py b/tests/testutils.py index 367dc752..2d65f96e 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -34,13 +34,15 @@ def sequential_id_generator(_): _SEQUENTIAL_CONFIG = CloudPickleConfig(id_generator=sequential_id_generator) _NO_TRACKING_CONFIG = CloudPickleConfig(id_generator=None) _SKIP_RESET_CONFIG = CloudPickleConfig(skip_reset_dynamic_type_state=True) +_USE_RELATIVE_FILEPATHS = CloudPickleConfig(use_relative_filepaths=True) CONFIG_REGISTRY = { "default": DEFAULT_CONFIG, "sequential": _SEQUENTIAL_CONFIG, "no_tracking": _NO_TRACKING_CONFIG, - "skip_reset": _SKIP_RESET_CONFIG + "skip_reset": _SKIP_RESET_CONFIG, + "use_relative_filepaths": _USE_RELATIVE_FILEPATHS } def get_config(config_key): From 4afe58599b2802e7765f3e7d74e4264a7fcc523f Mon Sep 17 00:00:00 2001 From: claudevdm Date: Fri, 26 Sep 2025 12:33:13 -0400 Subject: [PATCH 05/13] Add test. --- cloudpickle/cloudpickle.py | 6 ++++++ tests/cloudpickle_test.py | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index d35c5dbc..45958823 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1329,6 +1329,12 @@ def _function_getnewargs(self, func): base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) if base_globals == {}: + if "__file__" in func.__globals__: + # Apply normalization ONLY to the __file__ attribute + file_path = func.__globals__["__file__"] + if self.config.use_relative_filepaths: + file_path = _get_relative_path(file_path) + base_globals["__file__"] = file_path # Add module attributes used to resolve relative imports # instructions inside func. for k in ["__package__", "__name__", "__path__", "__file__"]: diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 5c6aa400..0a923624 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -3025,6 +3025,33 @@ class SampleDataclass: for f in found_fields: assert f._field_type is expected_ftypes[f.name] + def test_relative_filepaths_with_dynamic_types(self): + """Test relative filepath conversion using dynamically created types.""" + import os + import collections + + # Dynamic namedtuple (creates code objects with __file__) + DynamicTuple = collections.namedtuple('DynamicTuple', ['field1', 'field2']) + + original_file = DynamicTuple._make.__code__.co_filename + self.assertTrue(os.path.isabs(original_file), + f"Original co_filename should be absolute: {original_file}") + + pickled_tuple_class = self.pickle_depickle(DynamicTuple) + pickled_file = pickled_tuple_class._make.__code__.co_filename + + if self.config == 'use_relative_filepaths': + self.assertNotEqual(original_file, pickled_file, + "With relative config, co_filename should be converted") + self.assertTrue(not os.path.isabs(pickled_file), + f"Should be relative path: {pickled_file}") + else: + self.assertEqual(original_file, pickled_file, + "With default config, co_filename should be preserved") + self.assertTrue(os.path.isabs(pickled_file), + f"Should remain absolute: {pickled_file}") + + def test_interactively_defined_dataclass_with_initvar_and_classvar(self): code = """if __name__ == "__main__": import dataclasses From 3887f1eee6d3e395bc870766696b445346a82dce Mon Sep 17 00:00:00 2001 From: claudevdm Date: Fri, 26 Sep 2025 15:15:30 -0400 Subject: [PATCH 06/13] Update test. --- cloudpickle/cloudpickle.py | 2 +- tests/cloudpickle_test.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 45958823..05a05e0a 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1337,7 +1337,7 @@ def _function_getnewargs(self, func): base_globals["__file__"] = file_path # Add module attributes used to resolve relative imports # instructions inside func. - for k in ["__package__", "__name__", "__path__", "__file__"]: + for k in ["__package__", "__name__", "__path__"]: if k in func.__globals__: base_globals[k] = func.__globals__[k] diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 0a923624..f95f4183 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -3038,18 +3038,20 @@ def test_relative_filepaths_with_dynamic_types(self): f"Original co_filename should be absolute: {original_file}") pickled_tuple_class = self.pickle_depickle(DynamicTuple) - pickled_file = pickled_tuple_class._make.__code__.co_filename + pickled_co_filename = pickled_tuple_class._make.__code__.co_filename + pickled_file_path = pickled_tuple_class.__getnewargs__.__globals__['__file__'] if self.config == 'use_relative_filepaths': - self.assertNotEqual(original_file, pickled_file, + self.assertEqual(pickled_file_path, pickled_co_filename) + self.assertNotEqual(original_file, pickled_co_filename, "With relative config, co_filename should be converted") - self.assertTrue(not os.path.isabs(pickled_file), - f"Should be relative path: {pickled_file}") + self.assertTrue(not os.path.isabs(pickled_co_filename), + f"Should be relative path: {pickled_co_filename}") else: - self.assertEqual(original_file, pickled_file, + self.assertEqual(original_file, pickled_co_filename, "With default config, co_filename should be preserved") - self.assertTrue(os.path.isabs(pickled_file), - f"Should remain absolute: {pickled_file}") + self.assertTrue(os.path.isabs(pickled_co_filename), + f"Should remain absolute: {pickled_co_filename}") def test_interactively_defined_dataclass_with_initvar_and_classvar(self): From 9195562ded8534dfca64c8f5542f56f2fc660d2a Mon Sep 17 00:00:00 2001 From: claudevdm Date: Sun, 28 Sep 2025 19:18:50 -0400 Subject: [PATCH 07/13] Docs and typehitns. --- cloudpickle/cloudpickle.py | 42 ++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 05a05e0a..c6d0c951 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -105,7 +105,27 @@ def uuid_generator(_): @dataclasses.dataclass class CloudPickleConfig: - """Configuration for cloudpickle behavior.""" + """Configuration for cloudpickle behavior. + + This class controls various aspects of how cloudpickle serializes objects. + + Attributes: + id_generator: Callable that generates unique identifiers for dynamic + types. Controls isinstance semantics preservation. If None, + disables type tracking and isinstance relationships are not + preserved across pickle/unpickle cycles. If callable, generates + unique IDs to maintain object identity. + Default: uuid_generator (generates UUID hex strings). + + skip_reset_dynamic_type_state: Whether to skip resetting state when + reconstructing dynamic types. If True, skips state reset for + already-reconstructed types. + + use_relative_filepaths: Whether to convert absolute file paths to + relative paths in code objects and function globals. If True, + converts paths relative to sys.path for better determinism across + environments. If False, preserves original absolute paths. + """ id_generator: typing.Optional[callable] = uuid_generator skip_reset_dynamic_type_state: bool = False use_relative_filepaths: bool = False @@ -634,7 +654,7 @@ def _make_typevar(name, bound, constraints, covariant, contravariant, class_trac return _lookup_class_or_track(class_tracker_id, tv) -def _decompose_typevar(obj, config): +def _decompose_typevar(obj, config: CloudPickleConfig): return ( obj.__name__, obj.__bound__, @@ -645,7 +665,7 @@ def _decompose_typevar(obj, config): ) -def _typevar_reduce(obj, config): +def _typevar_reduce(obj, config: CloudPickleConfig): # TypeVar instances require the module information hence why we # are not using the _should_pickle_by_reference directly module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) @@ -697,7 +717,7 @@ def _make_dict_items(obj, is_ordered=False): # ------------------------------------------------- -def _class_getnewargs(obj, config): +def _class_getnewargs(obj, config: CloudPickleConfig): type_kwargs = {} if "__module__" in obj.__dict__: type_kwargs["__module__"] = obj.__module__ @@ -716,7 +736,7 @@ def _class_getnewargs(obj, config): ) -def _enum_getnewargs(obj, config): +def _enum_getnewargs(obj, config: CloudPickleConfig): members = {e.name: e.value for e in obj} return ( obj.__bases__, @@ -853,7 +873,7 @@ def _enum_getstate(obj): # these holes". -def _code_reduce(obj, config): +def _code_reduce(obj, config: CloudPickleConfig): """code object reducer.""" # If you are not sure about the order of arguments, take a look at help # of the specific type from types, for example: @@ -1073,7 +1093,7 @@ def _weakset_reduce(obj): return weakref.WeakSet, (list(obj),) -def _dynamic_class_reduce(obj, config): +def _dynamic_class_reduce(obj, config: CloudPickleConfig): """Save a class that can't be referenced as a module attribute. This method is used to serialize classes that are defined inside @@ -1104,7 +1124,7 @@ def _dynamic_class_reduce(obj, config): ) -def _class_reduce(obj, config): +def _class_reduce(obj, config: CloudPickleConfig): """Select the reducer depending on the dynamic nature of the class obj.""" if obj is type(None): # noqa return type, (None,) @@ -1357,7 +1377,7 @@ def dump(self, obj): msg = "Could not pickle object as excessively deep recursion required." raise pickle.PicklingError(msg) from e - def __init__(self, file, protocol=None, buffer_callback=None, config=DEFAULT_CONFIG): + def __init__(self, file, protocol=None, buffer_callback=None, config: CloudPickleConfig=DEFAULT_CONFIG): if protocol is None: protocol = DEFAULT_PROTOCOL super().__init__(file, protocol=protocol, buffer_callback=buffer_callback) @@ -1568,7 +1588,7 @@ def save_pypy_builtin_func(self, obj): # Shorthands similar to pickle.dump/pickle.dumps -def dump(obj, file, protocol=None, buffer_callback=None, config=DEFAULT_CONFIG): +def dump(obj, file, protocol=None, buffer_callback=None, config: CloudPickleConfig=DEFAULT_CONFIG): """Serialize obj as bytes streamed into file protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to @@ -1584,7 +1604,7 @@ def dump(obj, file, protocol=None, buffer_callback=None, config=DEFAULT_CONFIG): Pickler(file, protocol=protocol, buffer_callback=buffer_callback, config=config).dump(obj) -def dumps(obj, protocol=None, buffer_callback=None, config=DEFAULT_CONFIG): +def dumps(obj, protocol=None, buffer_callback=None, config: CloudPickleConfig=DEFAULT_CONFIG): """Serialize obj as a string of bytes allocated in memory protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to From 98ce043f167693efa0d922448760bd12be9b8c8d Mon Sep 17 00:00:00 2001 From: claudevdm Date: Sun, 28 Sep 2025 20:04:56 -0400 Subject: [PATCH 08/13] Fix backward compat error. --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index c6d0c951..b1eeff8f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1218,7 +1218,7 @@ def _function_setstate(obj, state): for k, v in slotstate.items(): setattr(obj, k, v) -def _class_setstate(obj, state, skip_reset_dynamic_type_state): +def _class_setstate(obj, state, skip_reset_dynamic_type_state=False): # Lock while potentially modifying class state. with _DYNAMIC_CLASS_TRACKER_LOCK: if skip_reset_dynamic_type_state and obj in _DYNAMIC_CLASS_STATE_TRACKER_BY_CLASS: From d0c54fe88ce4c531cc3d117f026889435ae1ae73 Mon Sep 17 00:00:00 2001 From: claudevdm Date: Sun, 28 Sep 2025 22:02:59 -0400 Subject: [PATCH 09/13] Use filepath interceptor. --- cloudpickle/cloudpickle.py | 18 ++++++++---------- tests/testutils.py | 7 ++----- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index b1eeff8f..e60cd64d 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -121,14 +121,12 @@ class CloudPickleConfig: reconstructing dynamic types. If True, skips state reset for already-reconstructed types. - use_relative_filepaths: Whether to convert absolute file paths to - relative paths in code objects and function globals. If True, - converts paths relative to sys.path for better determinism across - environments. If False, preserves original absolute paths. + filepath_interceptor: Used to modify filepaths in `co_filename` and + function.__globals__['__file__']. """ id_generator: typing.Optional[callable] = uuid_generator skip_reset_dynamic_type_state: bool = False - use_relative_filepaths: bool = False + filepath_interceptor: typing.Optional[callable] = None DEFAULT_CONFIG = CloudPickleConfig() @@ -419,7 +417,7 @@ def func(): return subimports -def _get_relative_path(path): +def get_relative_path(path): """Returns the path of a filename relative to the longest matching directory in sys.path. Args: @@ -894,8 +892,8 @@ def _code_reduce(obj, config: CloudPickleConfig): co_cellvars = tuple(name for name in obj.co_cellvars) co_filename = obj.co_filename - if (config and config.use_relative_filepaths): - co_filename = _get_relative_path(co_filename) + if (config and config.filepath_interceptor): + co_filename = config.filepath_interceptor(co_filename) if hasattr(obj, "co_exceptiontable"): # Python 3.11 and later: there are some new attributes @@ -1352,8 +1350,8 @@ def _function_getnewargs(self, func): if "__file__" in func.__globals__: # Apply normalization ONLY to the __file__ attribute file_path = func.__globals__["__file__"] - if self.config.use_relative_filepaths: - file_path = _get_relative_path(file_path) + if self.config.filepath_interceptor: + file_path = self.config.filepath_interceptor(file_path) base_globals["__file__"] = file_path # Add module attributes used to resolve relative imports # instructions inside func. diff --git a/tests/testutils.py b/tests/testutils.py index 2d65f96e..c20eb0f7 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -11,11 +11,11 @@ from concurrent.futures import ProcessPoolExecutor import psutil +from cloudpickle import get_relative_path from cloudpickle import dumps from cloudpickle import CloudPickleConfig from cloudpickle import DEFAULT_CONFIG from subprocess import TimeoutExpired -import logging loads = pickle.loads TIMEOUT = 60 @@ -25,8 +25,6 @@ _NEXT_DYNAMIC_CLASS_TRACKER_ID = 1 def sequential_id_generator(_): - # logging.warning('CLAUDE HELLO ') - # raise ValueError('hello') global _NEXT_DYNAMIC_CLASS_TRACKER_ID _NEXT_DYNAMIC_CLASS_TRACKER_ID += 1 return str(_NEXT_DYNAMIC_CLASS_TRACKER_ID) @@ -34,7 +32,7 @@ def sequential_id_generator(_): _SEQUENTIAL_CONFIG = CloudPickleConfig(id_generator=sequential_id_generator) _NO_TRACKING_CONFIG = CloudPickleConfig(id_generator=None) _SKIP_RESET_CONFIG = CloudPickleConfig(skip_reset_dynamic_type_state=True) -_USE_RELATIVE_FILEPATHS = CloudPickleConfig(use_relative_filepaths=True) +_USE_RELATIVE_FILEPATHS = CloudPickleConfig(filepath_interceptor=get_relative_path) CONFIG_REGISTRY = { @@ -149,7 +147,6 @@ def pickle_echo(stream_in=None, stream_out=None, protocol=None, config=None): input_bytes = _read_all_bytes(stream_in) stream_in.close() obj = loads(input_bytes) - # logging.warning(f"CLAUDE {config} {get_config(config)}") repickled_bytes = dumps(obj, protocol=protocol, config=get_config(config)) stream_out.write(repickled_bytes) stream_out.close() From 68b2a8dbf4c12e582ac6d0c2e7dc661eeb325367 Mon Sep 17 00:00:00 2001 From: kristynsmith Date: Tue, 7 Oct 2025 13:58:50 -0400 Subject: [PATCH 10/13] add stable code identifier changes to cloudpickle.py --- cloudpickle/cloudpickle.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e60cd64d..e43bcccf 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -123,10 +123,18 @@ class CloudPickleConfig: filepath_interceptor: Used to modify filepaths in `co_filename` and function.__globals__['__file__']. + + get_code_object_identifier: Use identifiers derived from code + location when pickling dynamic functions (e.g. lambdas). Enabling + this setting results in pickled payloads becoming more stable to + code changes: when a particular lambda function is slightly + modified but the location of the function in the codebase has not + changed, the pickled representation might stay the same. """ id_generator: typing.Optional[callable] = uuid_generator skip_reset_dynamic_type_state: bool = False filepath_interceptor: typing.Optional[callable] = None + get_code_object_identifier: typing.Optional[callable] = None DEFAULT_CONFIG = CloudPickleConfig() @@ -569,6 +577,12 @@ def _make_function(code, globals, name, argdefs, closure): return types.FunctionType(code, globals, name, argdefs, closure) +def _make_function_from_identifier( + get_code_from_identifier, code_path, globals, name, argdefs, closure): + fcode = get_code_from_identifier(code_path) + return _make_function(fcode, globals, name, argdefs, closure) + + def _make_empty_cell(): if False: # trick the compiler into creating an empty cell in our lambda @@ -1307,6 +1321,20 @@ class Pickler(pickle.Pickler): dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) + def _stable_identifier_function_reduce(self, func): + code_path = self.config.get_code_object_identifier(func) + if not code_path: + return self._dynamic_function_reduce(func) + newargs = (code_path, ) + state = _function_getstate(func) + return ( + _make_function_from_identifier, + newargs, + state, + None, + None, + _function_setstate) + # function reducers are defined as instance methods of cloudpickle.Pickler # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref) def _dynamic_function_reduce(self, func): @@ -1326,6 +1354,9 @@ def _function_reduce(self, obj): """ if _should_pickle_by_reference(obj): return NotImplemented + elif (self.config.get_code_object_identifier is not None and + self.config.get_code_object_identifier(obj)): + return self._stable_identifier_function_reduce(obj) else: return self._dynamic_function_reduce(obj) From bc77a93b93ecbe2b3658beff93534482b0c25821 Mon Sep 17 00:00:00 2001 From: kristynsmith Date: Tue, 7 Oct 2025 14:51:07 -0400 Subject: [PATCH 11/13] fix indentation --- cloudpickle/cloudpickle.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e43bcccf..17caa51f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1322,18 +1322,18 @@ class Pickler(pickle.Pickler): dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) def _stable_identifier_function_reduce(self, func): - code_path = self.config.get_code_object_identifier(func) - if not code_path: - return self._dynamic_function_reduce(func) - newargs = (code_path, ) - state = _function_getstate(func) - return ( - _make_function_from_identifier, - newargs, - state, - None, - None, - _function_setstate) + code_path = self.config.get_code_object_identifier(func) + if not code_path: + return self._dynamic_function_reduce(func) + newargs = (code_path, ) + state = _function_getstate(func) + return ( + _make_function_from_identifier, + newargs, + state, + None, + None, + _function_setstate) # function reducers are defined as instance methods of cloudpickle.Pickler # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref) From 28af303d261a4bb80891d4e0524bd5beaa15cb91 Mon Sep 17 00:00:00 2001 From: kristynsmith Date: Wed, 8 Oct 2025 10:47:32 -0400 Subject: [PATCH 12/13] fix conditional --- cloudpickle/cloudpickle.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 17caa51f..d5ae330a 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1354,8 +1354,7 @@ def _function_reduce(self, obj): """ if _should_pickle_by_reference(obj): return NotImplemented - elif (self.config.get_code_object_identifier is not None and - self.config.get_code_object_identifier(obj)): + elif self.config.get_code_object_identifier is not None: return self._stable_identifier_function_reduce(obj) else: return self._dynamic_function_reduce(obj) From f181abaef1697026f7b68cec3aa6d89abc5ff4a9 Mon Sep 17 00:00:00 2001 From: praneetnadella Date: Thu, 5 Feb 2026 00:47:09 +0000 Subject: [PATCH 13/13] Add pickle_main_by_ref option to CloudPickleConfig This option allows pickling objects defined in __main__ by reference. This is critical for distributed systems like Apache Beam where workers import the main module but want to avoid serializing the entire main session state by value. Defaults to False to preserve existing behavior. --- cloudpickle/cloudpickle.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index d5ae330a..951f6192 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -135,6 +135,7 @@ class CloudPickleConfig: skip_reset_dynamic_type_state: bool = False filepath_interceptor: typing.Optional[callable] = None get_code_object_identifier: typing.Optional[callable] = None + pickle_main_by_ref: bool = False DEFAULT_CONFIG = CloudPickleConfig() @@ -275,7 +276,7 @@ def _whichmodule(obj, name): return None -def _should_pickle_by_reference(obj, name=None): +def _should_pickle_by_reference(obj, name=None, config=DEFAULT_CONFIG): """Test whether an function or a class should be pickled by reference Pickling by reference means by that the object (typically a function or a @@ -290,7 +291,7 @@ def _should_pickle_by_reference(obj, name=None): explicitly registered to be pickled by value. """ if isinstance(obj, types.FunctionType) or issubclass(type(obj), type): - module_and_name = _lookup_module_and_qualname(obj, name=name) + module_and_name = _lookup_module_and_qualname(obj, name=name, config=config) if module_and_name is None: return False module, name = module_and_name @@ -311,7 +312,7 @@ def _should_pickle_by_reference(obj, name=None): ) -def _lookup_module_and_qualname(obj, name=None): +def _lookup_module_and_qualname(obj, name=None, config=DEFAULT_CONFIG): if name is None: name = getattr(obj, "__qualname__", None) if name is None: # pragma: no cover @@ -327,7 +328,7 @@ def _lookup_module_and_qualname(obj, name=None): # imported module. obj is thus treated as dynamic. return None - if module_name == "__main__": + if module_name == "__main__" and not config.pickle_main_by_ref: return None # Note: if module_name is in sys.modules, the corresponding module is @@ -680,7 +681,7 @@ def _decompose_typevar(obj, config: CloudPickleConfig): def _typevar_reduce(obj, config: CloudPickleConfig): # TypeVar instances require the module information hence why we # are not using the _should_pickle_by_reference directly - module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) + module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__, config=config) if module_and_name is None: return (_make_typevar, _decompose_typevar(obj, config)) @@ -1072,8 +1073,8 @@ def _memoryview_reduce(obj): return bytes, (obj.tobytes(),) -def _module_reduce(obj): - if _should_pickle_by_reference(obj): +def _module_reduce(obj, config: CloudPickleConfig): + if _should_pickle_by_reference(obj, config=config): return subimport, (obj.__name__,) else: # Some external libraries can populate the "__builtins__" entry of a @@ -1146,7 +1147,7 @@ def _class_reduce(obj, config: CloudPickleConfig): return type, (NotImplemented,) elif obj in _BUILTIN_TYPE_NAMES: return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) - elif not _should_pickle_by_reference(obj): + elif not _should_pickle_by_reference(obj, config=config): return _dynamic_class_reduce(obj, config) return NotImplemented @@ -1303,7 +1304,6 @@ class Pickler(pickle.Pickler): _dispatch_table[staticmethod] = _classmethod_reduce _dispatch_table[CellType] = _cell_reduce _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce - _dispatch_table[types.ModuleType] = _module_reduce _dispatch_table[types.MethodType] = _method_reduce _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce _dispatch_table[weakref.WeakSet] = _weakset_reduce @@ -1352,7 +1352,7 @@ def _function_reduce(self, obj): obj using a custom cloudpickle reducer designed specifically to handle dynamic functions. """ - if _should_pickle_by_reference(obj): + if _should_pickle_by_reference(obj, config=self.config): return NotImplemented elif self.config.get_code_object_identifier is not None: return self._stable_identifier_function_reduce(obj) @@ -1488,6 +1488,8 @@ def reducer_override(self, obj): return _code_reduce(obj, self.config) elif isinstance(obj, types.FunctionType): return self._function_reduce(obj) + elif isinstance(obj, types.ModuleType): + return _module_reduce(obj, self.config) else: # fallback to save_global, including the Pickler's # dispatch_table @@ -1552,7 +1554,7 @@ def save_global(self, obj, name=None, pack=struct.pack): if name is not None: super().save_global(obj, name=name) - elif not _should_pickle_by_reference(obj, name=name): + elif not _should_pickle_by_reference(obj, name=name, config=self.config): self._save_reduce_pickle5(*_dynamic_class_reduce(obj, self.config), obj=obj) else: super().save_global(obj, name=name) @@ -1576,7 +1578,7 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - if _should_pickle_by_reference(obj, name=name): + if _should_pickle_by_reference(obj, name=name, config=self.config): return super().save_global(obj, name=name) elif PYPY and isinstance(obj.__code__, builtin_code_type): return self.save_pypy_builtin_func(obj)