diff --git a/README b/README index ea49e64..053c799 100644 --- a/README +++ b/README @@ -1,3 +1,8 @@ -a port of the serialize and unserialize functions of php to python. This module +A port of the serialize and unserialize functions of php to python. This module implements the python serialization interface (eg: provides dumps, loads and similar functions). + + +Sessions unserializing is also supported if they were saved using PHP's +internal serializer and without encryption (see +http://www.hardened-php.net/suhosin/configuration.html#suhosin.session.encrypt). \ No newline at end of file diff --git a/phpserialize.py b/phpserialize.py index 036cfb4..1b4c53c 100644 --- a/phpserialize.py +++ b/phpserialize.py @@ -236,6 +236,8 @@ class WP_User extends WP_UserBase { Changelog ========= + 1.4 + - added support for PHP sessions 1.3 - added support for Python 3 @@ -283,7 +285,7 @@ class WP_User extends WP_UserBase { xrange = range __author__ = 'Armin Ronacher ' -__version__ = '1.3' +__version__ = '1.4' __all__ = ('phpobject', 'convert_member_dict', 'dict_to_list', 'dict_to_tuple', 'load', 'loads', 'dump', 'dumps', 'serialize', 'unserialize') @@ -412,7 +414,7 @@ def _serialize(obj, keypos): def load(fp, charset='utf-8', errors=default_errors, decode_strings=False, - object_hook=None, array_hook=None): + object_hook=None, array_hook=None, return_unicode=False): """Read a string from the open file object `fp` and interpret it as a data stream of PHP-serialized objects, reconstructing and returning the original object hierarchy. @@ -490,6 +492,8 @@ def _unserialize(): _expect(b'"') if decode_strings: data = data.decode(charset, errors) + if return_unicode: + data = unicode(data, charset) _expect(b';') return data if type_ == b'a': @@ -507,19 +511,46 @@ def _unserialize(): if decode_strings: name = name.decode(charset, errors) return object_hook(name, dict(_load_array())) - raise ValueError('unexpected opcode') + if type_ == b'r': + # recursion + _expect(b':') + data = _read_until(b';') + return None + raise ValueError('unexpected opcode - %s' % repr(type_)) + + fp_position = fp.tell() + chunk = _read_until(':'); + fp.seek(fp_position) # Reset pointer + if '|' in chunk: + # We may be dealing with a serialized session, in which case keys + # followed by a pipe are preceding the serialized data. + unserialized_data = {} + while 1: + try: + key = _read_until('|'); + except ValueError: + break # end of stream + if return_unicode: + key = unicode(key, charset) + unserialized_data[key] = _unserialize() + else: + unserialized_data = _unserialize() - return _unserialize() + return unserialized_data def loads(data, charset='utf-8', errors=default_errors, decode_strings=False, - object_hook=None, array_hook=None): + object_hook=None, array_hook=None, return_unicode=False): """Read a PHP-serialized object hierarchy from a string. Characters in the string past the object's representation are ignored. On Python 3 the string must be a bytestring. """ + # Convert unicode strings to byte strings. + if type(data) == unicode: + data = data.encode(charset) + return_unicode = True return load(BytesIO(data), charset, errors, decode_strings, - object_hook, array_hook) + object_hook, array_hook, return_unicode) def dump(data, fp, charset='utf-8', errors=default_errors, object_hook=None): @@ -549,6 +580,35 @@ def dict_to_list(d): except KeyError: raise ValueError('dict is not a sequence') +def full_dict_to_list(d, array_hook=dict): + """Converts a full dict into a more Pythonic structure. In particular ensure that all list-like structures are handled as Python lists + Examples: + + d1 = {'a': 'b', 'c': {0: '1', 1: '2', 2: {'e': 7}, 3: {2: 8}}} + full_dict_to_list(d1) returns + {'a': 'b', 'c': ['1', '2', {'e': 7}, {2: 8}]} + + d2 = OrderedDict({'a': 'b', 'c': OrderedDict({0: '1', 1: '2', 2: OrderedDict({'e': 7}), 3: OrderedDict({2: 8})})}) + full_dict_to_list(d2, OrderedDict) returns + OrderedDict([('a', 'b'), ('c', ['1', '2', OrderedDict([('e', 7)]), OrderedDict([(2, 8)])])]) + """ + if type(d) is array_hook: + keys = list(d.keys()) + + if keys == list(range(0, len(keys))): #Any array with keys 0..N is considered to be a list + return [full_dict_to_list(val) for val in d.values()] + elif len(keys) == 1: + key = keys[0] + return {key : full_dict_to_list(d[key], array_hook)} + else: + tmp_dict = array_hook() + for key in keys: + tmp_dict.update( {key : full_dict_to_list(d[key], array_hook)} ) + return tmp_dict + else: + return d + + def dict_to_tuple(d): """Converts an ordered dict into a tuple.""" diff --git a/setup.py b/setup.py index 49f17af..3631064 100644 --- a/setup.py +++ b/setup.py @@ -21,8 +21,8 @@ def get_docs(): name='phpserialize', author='Armin Ronacher', author_email='armin.ronacher@active-4.com', - version='1.3', - url='http://github.com/mitsuhiko/phpserialize', + version='1.5', + url='https://github.com/nathanwalsh/phpserialize', py_modules=['phpserialize'], description='a port of the serialize and unserialize ' 'functions of php to python.', diff --git a/tests.py b/tests.py index f6d7066..a7352d3 100644 --- a/tests.py +++ b/tests.py @@ -99,6 +99,44 @@ def test_basic_object_hook(self): self.assertEqual(user.username, 'admin') self.assertEqual(user.__name__, 'WP_User') + def test_full_dict_to_list(self): + #with normal dict + d1 = {'a': 'b', 'c': {0: '1', 1: '2', 2: {'e': 7}, 3: {2: 8}}} + d1_cleaned = {'a': 'b', 'c': ['1', '2', {'e': 7}, {2: 8}]} + self.assertEqual(phpserialize.full_dict_to_list(d1), d1_cleaned) + + #with OrderedDict as array_hook + from collections import OrderedDict + d2 = OrderedDict({'a': 'b', 'c': OrderedDict({0: '1', 1: '2', 2: OrderedDict({'e': 7}), 3: OrderedDict({2: 8})})}) + d2_cleaned = OrderedDict([('a', 'b'), ('c', ['1', '2', OrderedDict([('e', 7)]), OrderedDict([(2, 8)])])]) + self.assertEqual(phpserialize.full_dict_to_list(d2, OrderedDict), d2_cleaned) + + + def test_session(self): + data = b'foo|a:1:{s:1:"a";s:1:"b";}bar|a:1:{s:1:"c";s:1:"d";}' + session = phpserialize.loads(data) + self.assertEqual(session, {'foo': {'a': 'b'}, 'bar': {'c': 'd'}}) + + def test_loads_unicode_strings(self): + data = u's:6:"Björk";' + result = phpserialize.loads(data) + self.assertEqual(result, u'Björk') + + def test_loads_unicode_dict(self): + data = u'a:1:{s:6:"Björk";s:16:"Guðmundsdóttir";}' + result = phpserialize.loads(data) + self.assertEqual(result, {u'Björk': u'Guðmundsdóttir'}) + + def test_basic_unicode_object_hook(self): + data = u'O:8:"stdClass":1:{s:4:"name";s:6:"Björk";}' + user = phpserialize.loads(data, object_hook=phpserialize.phpobject) + self.assertEqual(user.name, u'Björk') + + def test_session_loads_unicode_strings(self): + data = u'Björk|a:1:{s:6:"Björk";s:16:"Guðmundsdóttir";}' + session = phpserialize.loads(data) + self.assertEqual(session, {u'Björk': {u'Björk': u'Guðmundsdóttir'}}) + if __name__ == '__main__': unittest.main()