Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 12 additions & 43 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ to:
- hash is computed once for lookup and reused to store the copy
- keepalive is a lightweight vector of pointers instead of a `list`
- memo object is not tracked in GC, unless stolen in custom `__deepcopy__`
- #### Native __reduce__ handling
When type's `__reduce__` strictly follows the protocol, `copium` handles returned values natively,
without interpreter overhead, the same way CPython pickle implementation does.

[What if there's type mismatch?](#pickle-protocol)
- #### Cached memo
Rather than creating a new memo object for each `deepcopy` and discarding it after, copium stores
one per thread and reuses it. Referenced objects are cleared, but some amount of memory stays
Expand All @@ -118,50 +123,14 @@ still there are minor deviations from stdlib you should be aware of.

### Pickle protocol

`copium` is stricter than `copy` for some malformed `__reduce__` implementations.
stdlib's `copy` tolerates some deviations from the pickle protocol that `pickle` itself reject
(see https://github.com/python/cpython/issues/141757).

stdlib's `copy` tolerates some deviations from the pickle protocol that `pickle` (and `copium`) reject (see https://github.com/python/cpython/issues/141757).

<details>
<summary>Example</summary>

```python-repl
>>> import copy
... import pickle
...
... import copium
...
... class BadReduce:
... def __reduce__(self):
... return BadReduce, []
...
>>> copy.deepcopy(BadReduce()) # copy doesn't require exact types in __reduce__
<__main__.BadReduce object at 0x1026d7b10>
>>> copium.deepcopy(BadReduce()) # copium is stricter
Traceback (most recent call last):
File "<python-input-2>", line 1, in <module>
copium.deepcopy(BadReduce())
~~~~~~~~~~~~~~~^^^^^^^^^^^^^
TypeError: second item of the tuple returned by __reduce__ must be a tuple, not list

>>> pickle.dumps(BadReduce()) # so is pickle
Traceback (most recent call last):
File "<python-input-3>", line 1, in <module>
pickle.dumps(BadReduce())
~~~~~~~~~~~~^^^^^^^^^^^^^
_pickle.PicklingError: second item of the tuple returned by __reduce__ must be a tuple, not list
when serializing BadReduce object
```

</details>

If `copium` raises `TypeError` while `copy` does not, see if `pickle.dumps(obj)` works.
If it doesn't, the fix is easy: make your object comply with pickle protocol.

[Tracking issue](https://github.com/Bobronium/copium/issues/32)

> [!NOTE]
> If this becomes a real blocker for adoption, `copium` might mimic stdlib's behavior in the future releases while still being fast.
`copium` strictly follows stdlib semantics: if `__reduce__`
returns a list instead of a tuple for args, or a mapping instead of a dict for kwargs,
`copium` will coerce them the same way stdlib would
(via `*args` unpacking, `**kwargs` merging, `.items()` iteration, etc.).
Errors from malformed `__reduce__` results match what `copy.deepcopy` produces.

### Memo handling

Expand Down
2 changes: 1 addition & 1 deletion datamodelzoo
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ typecheck = [
test = [
"pytest>=8",
"pytest-assert-type>=0.1.5",
"indifference>=0.1.0",
"indifference>=0.2.0",
"typing-extensions; python_version < '3.12'",
"datamodelzoo",
"pytest-codspeed>=4.2.0",
Expand Down
142 changes: 109 additions & 33 deletions src/_deepcopy.c
Original file line number Diff line number Diff line change
Expand Up @@ -481,28 +481,39 @@ static PyObject* reconstruct_newobj_ex(PyObject* argtup, PyMemoObject* memo) {
);
return NULL;
}

PyObject* coerced_args = NULL;
PyObject* coerced_kwargs = NULL;

if (!PyTuple_Check(args)) {
PyErr_Format(
PyExc_TypeError,
"__newobj_ex__ arg 2 must be a tuple, not %.200s",
Py_TYPE(args)->tp_name
);
return NULL;
coerced_args = PySequence_Tuple(args);
if (!coerced_args)
return NULL;
args = coerced_args;
}
if (!PyDict_Check(kwargs)) {
PyErr_Format(
PyExc_TypeError,
"__newobj_ex__ arg 3 must be a dict, not %.200s",
Py_TYPE(kwargs)->tp_name
);
return NULL;
coerced_kwargs = PyDict_New();
if (!coerced_kwargs) {
Py_XDECREF(coerced_args);
return NULL;
}
if (PyDict_Merge(coerced_kwargs, kwargs, 1) < 0) {
Py_XDECREF(coerced_args);
Py_DECREF(coerced_kwargs);
return NULL;
}
kwargs = coerced_kwargs;
}

PyObject* copied_args = deepcopy(args, memo);
if (!copied_args)
Py_XDECREF(coerced_args);
if (!copied_args) {
Py_XDECREF(coerced_kwargs);
return NULL;
}

PyObject* copied_kwargs = deepcopy(kwargs, memo);
Py_XDECREF(coerced_kwargs);
if (!copied_kwargs) {
Py_DECREF(copied_args);
return NULL;
Expand Down Expand Up @@ -569,15 +580,22 @@ static int apply_dict_state(PyObject* instance, PyObject* dict_state, PyMemoObje
if (!dict_state || dict_state == Py_None)
return 0;

if (!PyDict_Check(dict_state)) {
PyErr_SetString(PyExc_TypeError, "state must be a dict");
return -1;
}

PyObject* copied = deepcopy(dict_state, memo);
if (!copied)
return -1;

if (UNLIKELY(!PyDict_Check(copied))) {
PyObject* instance_dict = PyObject_GetAttr(instance, module_state.s__dict__);
if (!instance_dict) {
Py_DECREF(copied);
return -1;
}
int ret = PyDict_Merge(instance_dict, copied, 1);
Py_DECREF(instance_dict);
Py_DECREF(copied);
return ret;
}

PyObject* instance_dict = PyObject_GetAttr(instance, module_state.s__dict__);
if (!instance_dict) {
Py_DECREF(copied);
Expand All @@ -604,15 +622,48 @@ static int apply_slot_state(PyObject* instance, PyObject* slotstate, PyMemoObjec
if (!slotstate || slotstate == Py_None)
return 0;

if (!PyDict_Check(slotstate)) {
PyErr_SetString(PyExc_TypeError, "slot state is not a dictionary");
return -1;
}

PyObject* copied = deepcopy(slotstate, memo);
if (!copied)
return -1;

if (UNLIKELY(!PyDict_Check(copied))) {
PyObject* items = PyObject_CallMethod(copied, "items", NULL);
Py_DECREF(copied);
if (!items)
return -1;

PyObject* iterator = PyObject_GetIter(items);
Py_DECREF(items);
if (!iterator)
return -1;

int ret = 0;
PyObject* pair;
while ((pair = PyIter_Next(iterator))) {
PyObject* seq = PySequence_Fast(pair, "items() must return pairs");
Py_DECREF(pair);
if (!seq || PySequence_Fast_GET_SIZE(seq) != 2) {
Py_XDECREF(seq);
if (!PyErr_Occurred())
PyErr_SetString(PyExc_ValueError, "not enough values to unpack");
ret = -1;
break;
}
int set_ret = PyObject_SetAttr(
instance, PySequence_Fast_GET_ITEM(seq, 0), PySequence_Fast_GET_ITEM(seq, 1)
);
Py_DECREF(seq);
if (set_ret < 0) {
ret = -1;
break;
}
}
if (ret == 0 && PyErr_Occurred())
ret = -1;
Py_DECREF(iterator);
return ret;
}

PyObject *key, *value;
Py_ssize_t pos = 0;
int ret = 0;
Expand Down Expand Up @@ -704,17 +755,42 @@ static int apply_dictitems(PyObject* instance, PyObject* dictitems, PyMemoObject
PyObject* pair;

while ((pair = PyIter_Next(iterator))) {
if (!PyTuple_Check(pair) || PyTuple_GET_SIZE(pair) != 2) {
Py_DECREF(pair);
PyErr_SetString(PyExc_ValueError, "dictiter must yield (key, value) pairs");
ret = -1;
break;
}
PyObject* key;
PyObject* value;

PyObject* key = PyTuple_GET_ITEM(pair, 0);
PyObject* value = PyTuple_GET_ITEM(pair, 1);
Py_INCREF(key);
Py_INCREF(value);
if (LIKELY(PyTuple_Check(pair) && PyTuple_GET_SIZE(pair) == 2)) {
key = Py_NewRef(PyTuple_GET_ITEM(pair, 0));
value = Py_NewRef(PyTuple_GET_ITEM(pair, 1));
} else {
PyObject* seq = PySequence_Fast(pair, "cannot unpack non-sequence");
if (!seq) {
Py_DECREF(pair);
ret = -1;
break;
}
Py_ssize_t n = PySequence_Fast_GET_SIZE(seq);
if (n != 2) {
Py_DECREF(seq);
Py_DECREF(pair);
if (n < 2)
PyErr_Format(
PyExc_ValueError,
"not enough values to unpack (expected 2, got %zd)",
n
);
else
PyErr_Format(
PyExc_ValueError,
"too many values to unpack (expected 2, got %zd)",
n
);
ret = -1;
break;
}
key = Py_NewRef(PySequence_Fast_GET_ITEM(seq, 0));
value = Py_NewRef(PySequence_Fast_GET_ITEM(seq, 1));
Py_DECREF(seq);
}
Py_DECREF(pair);

Py_SETREF(key, deepcopy(key, memo));
Expand Down
Loading