Skip to content
7 changes: 6 additions & 1 deletion README
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
a port of the serialize and unserialize functions of php to python. This module
A port of the serialize and unserialize functions of php to python. This module
implements the python serialization interface (eg: provides dumps, loads and
similar functions).


Sessions unserializing is also supported if they were saved using PHP's
internal serializer and without encryption (see
http://www.hardened-php.net/suhosin/configuration.html#suhosin.session.encrypt).
72 changes: 66 additions & 6 deletions phpserialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ class WP_User extends WP_UserBase {
Changelog
=========

1.4
- added support for PHP sessions
1.3
- added support for Python 3

Expand Down Expand Up @@ -283,7 +285,7 @@ class WP_User extends WP_UserBase {
xrange = range

__author__ = 'Armin Ronacher <armin.ronacher@active-4.com>'
__version__ = '1.3'
__version__ = '1.4'
__all__ = ('phpobject', 'convert_member_dict', 'dict_to_list', 'dict_to_tuple',
'load', 'loads', 'dump', 'dumps', 'serialize', 'unserialize')

Expand Down Expand Up @@ -412,7 +414,7 @@ def _serialize(obj, keypos):


def load(fp, charset='utf-8', errors=default_errors, decode_strings=False,
object_hook=None, array_hook=None):
object_hook=None, array_hook=None, return_unicode=False):
"""Read a string from the open file object `fp` and interpret it as a
data stream of PHP-serialized objects, reconstructing and returning
the original object hierarchy.
Expand Down Expand Up @@ -490,6 +492,8 @@ def _unserialize():
_expect(b'"')
if decode_strings:
data = data.decode(charset, errors)
if return_unicode:
data = unicode(data, charset)
_expect(b';')
return data
if type_ == b'a':
Expand All @@ -507,19 +511,46 @@ def _unserialize():
if decode_strings:
name = name.decode(charset, errors)
return object_hook(name, dict(_load_array()))
raise ValueError('unexpected opcode')
if type_ == b'r':
# recursion
_expect(b':')
data = _read_until(b';')
return None
raise ValueError('unexpected opcode - %s' % repr(type_))

fp_position = fp.tell()
chunk = _read_until(':');
fp.seek(fp_position) # Reset pointer
if '|' in chunk:
# We may be dealing with a serialized session, in which case keys
# followed by a pipe are preceding the serialized data.
unserialized_data = {}
while 1:
try:
key = _read_until('|');
except ValueError:
break # end of stream
if return_unicode:
key = unicode(key, charset)
unserialized_data[key] = _unserialize()
else:
unserialized_data = _unserialize()

return _unserialize()
return unserialized_data


def loads(data, charset='utf-8', errors=default_errors, decode_strings=False,
object_hook=None, array_hook=None):
object_hook=None, array_hook=None, return_unicode=False):
"""Read a PHP-serialized object hierarchy from a string. Characters in the
string past the object's representation are ignored. On Python 3 the
string must be a bytestring.
"""
# Convert unicode strings to byte strings.
if type(data) == unicode:
data = data.encode(charset)
return_unicode = True
return load(BytesIO(data), charset, errors, decode_strings,
object_hook, array_hook)
object_hook, array_hook, return_unicode)


def dump(data, fp, charset='utf-8', errors=default_errors, object_hook=None):
Expand Down Expand Up @@ -549,6 +580,35 @@ def dict_to_list(d):
except KeyError:
raise ValueError('dict is not a sequence')

def full_dict_to_list(d, array_hook=dict):
"""Converts a full dict into a more Pythonic structure. In particular ensure that all list-like structures are handled as Python lists
Examples:

d1 = {'a': 'b', 'c': {0: '1', 1: '2', 2: {'e': 7}, 3: {2: 8}}}
full_dict_to_list(d1) returns
{'a': 'b', 'c': ['1', '2', {'e': 7}, {2: 8}]}

d2 = OrderedDict({'a': 'b', 'c': OrderedDict({0: '1', 1: '2', 2: OrderedDict({'e': 7}), 3: OrderedDict({2: 8})})})
full_dict_to_list(d2, OrderedDict) returns
OrderedDict([('a', 'b'), ('c', ['1', '2', OrderedDict([('e', 7)]), OrderedDict([(2, 8)])])])
"""
if type(d) is array_hook:
keys = list(d.keys())

if keys == list(range(0, len(keys))): #Any array with keys 0..N is considered to be a list
return [full_dict_to_list(val) for val in d.values()]
elif len(keys) == 1:
key = keys[0]
return {key : full_dict_to_list(d[key], array_hook)}
else:
tmp_dict = array_hook()
for key in keys:
tmp_dict.update( {key : full_dict_to_list(d[key], array_hook)} )
return tmp_dict
else:
return d



def dict_to_tuple(d):
"""Converts an ordered dict into a tuple."""
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def get_docs():
name='phpserialize',
author='Armin Ronacher',
author_email='armin.ronacher@active-4.com',
version='1.3',
url='http://github.com/mitsuhiko/phpserialize',
version='1.5',
url='https://github.com/nathanwalsh/phpserialize',
py_modules=['phpserialize'],
description='a port of the serialize and unserialize '
'functions of php to python.',
Expand Down
38 changes: 38 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,44 @@ def test_basic_object_hook(self):
self.assertEqual(user.username, 'admin')
self.assertEqual(user.__name__, 'WP_User')

def test_full_dict_to_list(self):
#with normal dict
d1 = {'a': 'b', 'c': {0: '1', 1: '2', 2: {'e': 7}, 3: {2: 8}}}
d1_cleaned = {'a': 'b', 'c': ['1', '2', {'e': 7}, {2: 8}]}
self.assertEqual(phpserialize.full_dict_to_list(d1), d1_cleaned)

#with OrderedDict as array_hook
from collections import OrderedDict
d2 = OrderedDict({'a': 'b', 'c': OrderedDict({0: '1', 1: '2', 2: OrderedDict({'e': 7}), 3: OrderedDict({2: 8})})})
d2_cleaned = OrderedDict([('a', 'b'), ('c', ['1', '2', OrderedDict([('e', 7)]), OrderedDict([(2, 8)])])])
self.assertEqual(phpserialize.full_dict_to_list(d2, OrderedDict), d2_cleaned)


def test_session(self):
data = b'foo|a:1:{s:1:"a";s:1:"b";}bar|a:1:{s:1:"c";s:1:"d";}'
session = phpserialize.loads(data)
self.assertEqual(session, {'foo': {'a': 'b'}, 'bar': {'c': 'd'}})

def test_loads_unicode_strings(self):
data = u's:6:"Björk";'
result = phpserialize.loads(data)
self.assertEqual(result, u'Björk')

def test_loads_unicode_dict(self):
data = u'a:1:{s:6:"Björk";s:16:"Guðmundsdóttir";}'
result = phpserialize.loads(data)
self.assertEqual(result, {u'Björk': u'Guðmundsdóttir'})

def test_basic_unicode_object_hook(self):
data = u'O:8:"stdClass":1:{s:4:"name";s:6:"Björk";}'
user = phpserialize.loads(data, object_hook=phpserialize.phpobject)
self.assertEqual(user.name, u'Björk')

def test_session_loads_unicode_strings(self):
data = u'Björk|a:1:{s:6:"Björk";s:16:"Guðmundsdóttir";}'
session = phpserialize.loads(data)
self.assertEqual(session, {u'Björk': {u'Björk': u'Guðmundsdóttir'}})


if __name__ == '__main__':
unittest.main()