Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
a port of the serialize and unserialize functions of php to python. This module
A port of the serialize and unserialize functions of php to python. This module
implements the python serialization interface (eg: provides dumps, loads and
similar functions).


Sessions unserializing is also supported if they were saved using PHP's
internal serializer and without encryption (see
http://www.hardened-php.net/suhosin/configuration.html#suhosin.session.encrypt).
46 changes: 40 additions & 6 deletions phpserialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,10 @@ class WP_User extends WP_UserBase {

Changelog
=========

1.5
- added support for unicode strings
1.4
- added support for PHP sessions
1.3
- added support for Python 3

Expand Down Expand Up @@ -412,7 +415,7 @@ def _serialize(obj, keypos):


def load(fp, charset='utf-8', errors=default_errors, decode_strings=False,
object_hook=None, array_hook=None):
object_hook=None, array_hook=None, return_unicode=False):
"""Read a string from the open file object `fp` and interpret it as a
data stream of PHP-serialized objects, reconstructing and returning
the original object hierarchy.
Expand Down Expand Up @@ -490,6 +493,8 @@ def _unserialize():
_expect(b'"')
if decode_strings:
data = data.decode(charset, errors)
if return_unicode:
data = unicode(data, charset)
_expect(b';')
return data
if type_ == b'a':
Expand All @@ -507,19 +512,48 @@ def _unserialize():
if decode_strings:
name = name.decode(charset, errors)
return object_hook(name, dict(_load_array()))
raise ValueError('unexpected opcode')
if type_ == b'r':
# recursion
_expect(b':')
data = _read_until(b';')
return None
raise ValueError('unexpected opcode - %s' % repr(type_))

fp_position = fp.tell()
chunk = _read_until(b':');
fp.seek(fp_position) # Reset pointer
if b'|' in chunk:
# We may be dealing with a serialized session, in which case keys
# followed by a pipe are preceding the serialized data.
unserialized_data = {}
while 1:
try:
key = _read_until(b'|');
except ValueError:
break # end of stream
if decode_strings:
key = key.decode(charset, errors)
if return_unicode:
key = unicode(key, charset)
unserialized_data[key] = _unserialize()
else:
unserialized_data = _unserialize()

return _unserialize()
return unserialized_data


def loads(data, charset='utf-8', errors=default_errors, decode_strings=False,
object_hook=None, array_hook=None):
object_hook=None, array_hook=None, return_unicode=False):
"""Read a PHP-serialized object hierarchy from a string. Characters in the
string past the object's representation are ignored. On Python 3 the
string must be a bytestring.
"""
# Convert unicode strings to byte strings.
if type(data) == unicode:
data = data.encode(charset)
return_unicode = True
return load(BytesIO(data), charset, errors, decode_strings,
object_hook, array_hook)
object_hook, array_hook, return_unicode)


def dump(data, fp, charset='utf-8', errors=default_errors, object_hook=None):
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def get_docs():
name='phpserialize',
author='Armin Ronacher',
author_email='armin.ronacher@active-4.com',
version='1.3',
url='http://github.com/mitsuhiko/phpserialize',
version='1.5.1',
url='https://github.com/mitsuhiko/phpserialize',
py_modules=['phpserialize'],
description='a port of the serialize and unserialize '
'functions of php to python.',
Expand Down
31 changes: 29 additions & 2 deletions tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import unittest
import phpserialize
from collections import OrderedDict


class PhpSerializeTestCase(unittest.TestCase):
Expand Down Expand Up @@ -32,7 +33,8 @@ def test_dumps_tuple(self):
b'a:3:{i:0;i:7;i:1;i:8;i:2;i:9;}')

def test_dumps_dict(self):
self.assertEqual(phpserialize.dumps({'a': 1, 'b': 2, 'c': 3}),
od = OrderedDict({'a': 1, 'c': 3, 'b': 2})
self.assertEqual(phpserialize.dumps(od),
b'a:3:{s:1:"a";i:1;s:1:"c";i:3;s:1:"b";i:2;}')

def test_loads_dict(self):
Expand Down Expand Up @@ -88,7 +90,7 @@ def dump_object_hook(obj):
x = phpserialize.dumps(user, object_hook=dump_object_hook)
y = phpserialize.loads(x, object_hook=load_object_hook,
decode_strings=True)
self.assert_(b'WP_User' in x)
self.assertTrue(b'WP_User' in x)
self.assertEqual(type(y), type(user))
self.assertEqual(y.username, user.username)

Expand All @@ -99,6 +101,31 @@ def test_basic_object_hook(self):
self.assertEqual(user.username, 'admin')
self.assertEqual(user.__name__, 'WP_User')

def test_session(self):
data = b'foo|a:1:{s:1:"a";s:1:"b";}bar|a:1:{s:1:"c";s:1:"d";}'
session = phpserialize.loads(data, decode_strings=True)
self.assertEqual(session, {'foo': {'a': 'b'}, 'bar': {'c': 'd'}})

def test_loads_unicode_strings(self):
data = u's:6:"Björk";'
result = phpserialize.loads(data)
self.assertEqual(result, u'Björk')

def test_loads_unicode_dict(self):
data = u'a:1:{s:6:"Björk";s:16:"Guðmundsdóttir";}'
result = phpserialize.loads(data)
self.assertEqual(result, {u'Björk': u'Guðmundsdóttir'})

def test_basic_unicode_object_hook(self):
data = u'O:8:"stdClass":1:{s:4:"name";s:6:"Björk";}'
user = phpserialize.loads(data, object_hook=phpserialize.phpobject)
self.assertEqual(user.name, u'Björk')

def test_session_loads_unicode_strings(self):
data = u'Björk|a:1:{s:6:"Björk";s:16:"Guðmundsdóttir";}'
session = phpserialize.loads(data)
self.assertEqual(session, {u'Björk': {u'Björk': u'Guðmundsdóttir'}})


if __name__ == '__main__':
unittest.main()