diff --git a/README b/README index ea49e64..da7ea6a 100644 --- a/README +++ b/README @@ -1,3 +1,13 @@ -a port of the serialize and unserialize functions of php to python. This module +Fork of https://github.com/aioTV/phpserialize based on https://github.com/mitsuhiko/phpserialize + +A port of the serialize and unserialize functions of php to python. This module implements the python serialization interface (eg: provides dumps, loads and similar functions). + +Sessions unserializing is also supported if they were saved using PHP's +internal serializer and without encryption (see +http://www.hardened-php.net/suhosin/configuration.html#suhosin.session.encrypt). + +Support Native Nasted Objets by hdbreaker # Last Commit Apr 14, 2016 +Support Python3, PHP Sessions, Unicode chars by aioTV # Last Commit Mar 30, 2016 +Support Serialize and Unserialize by mitsuhiko # Last Commit 22 Jan 2012 diff --git a/phpserialize.py b/phpserialize.py index 036cfb4..4e62656 100644 --- a/phpserialize.py +++ b/phpserialize.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- r""" - phpserialize + phpserialize + nasted object and python object direct serialization by hdbreaker ~~~~~~~~~~~~ a port of the ``serialize`` and ``unserialize`` functions of @@ -236,6 +237,8 @@ class WP_User extends WP_UserBase { Changelog ========= + 1.4 + - added support for PHP sessions 1.3 - added support for Python 3 @@ -399,11 +402,15 @@ def _serialize(obj, keypos): str(len(obj)).encode('latin1'), b':{', b''.join(out), - b'}' + b'};' ]) if isinstance(obj, phpobject): return b'O' + _serialize(obj.__name__, True)[1:-1] + \ _serialize(obj.__php_vars__, False)[1:] + else: + if isinstance(obj, object): + return b'O' + _serialize(obj.__class__.__name__, True)[1:-1] + \ + _serialize(obj.__dict__, False)[1:] if object_hook is not None: return _serialize(object_hook(obj), False) raise TypeError('can\'t serialize %r' % type(obj)) @@ -412,7 +419,7 @@ def _serialize(obj, keypos): def load(fp, charset='utf-8', errors=default_errors, decode_strings=False, - object_hook=None, array_hook=None): + object_hook=None, array_hook=None, return_unicode=False): """Read a string from the open file object `fp` and interpret it as a data stream of PHP-serialized objects, reconstructing and returning the original object hierarchy. @@ -440,7 +447,7 @@ class data members. The data member names are in PHP format which is def _expect(e): v = fp.read(len(e)) - if v != e: + if v != e and v == '}': raise ValueError('failed expectation, expected %r got %r' % (e, v)) def _read_until(delim): @@ -471,55 +478,85 @@ def _load_array(): def _unserialize(): type_ = fp.read(1).lower() - if type_ == b'n': - _expect(b';') - return None - if type_ in b'idb': - _expect(b':') - data = _read_until(b';') - if type_ == b'i': - return int(data) - if type_ == b'd': - return float(data) - return int(data) != 0 - if type_ == b's': - _expect(b':') - length = int(_read_until(b':')) - _expect(b'"') - data = fp.read(length) - _expect(b'"') - if decode_strings: - data = data.decode(charset, errors) - _expect(b';') - return data - if type_ == b'a': - _expect(b':') - return array_hook(_load_array()) - if type_ == b'o': - if object_hook is None: - raise ValueError('object in serialization dump but ' - 'object_hook not given.') - _expect(b':') - name_length = int(_read_until(b':')) - _expect(b'"') - name = fp.read(name_length) - _expect(b'":') - if decode_strings: - name = name.decode(charset, errors) - return object_hook(name, dict(_load_array())) - raise ValueError('unexpected opcode') - - return _unserialize() + if type_ != ';': + if type_ == b'n': + _expect(b';') + return None + if type_ in b'idb': + _expect(b':') + data = _read_until(b';') + if type_ == b'i': + return int(data) + if type_ == b'd': + return float(data) + return int(data) != 0 + if type_ == b's': + _expect(b':') + length = int(_read_until(b':')) + _expect(b'"') + data = fp.read(length) + _expect(b'"') + if decode_strings: + data = data.decode(charset, errors) + if return_unicode: + data = unicode(data, charset) + _expect(b';') + return data + if type_ == b'a': + _expect(b':') + return array_hook(_load_array()) + if type_ == b'o': + if object_hook is None: + raise ValueError('object in serialization dump but ' + 'object_hook not given.') + _expect(b':') + name_length = int(_read_until(b':')) + _expect(b'"') + name = fp.read(name_length) + _expect(b'":') + if decode_strings: + name = name.decode(charset, errors) + return object_hook(name, dict(_load_array())) + if type_ == b'r': + # recursion + _expect(b':') + data = _read_until(b';') + return None + raise ValueError('unexpected opcode - %s' % repr(type_)) + + fp_position = fp.tell() + chunk = _read_until(b':'); + fp.seek(fp_position) # Reset pointer + if b'|' in chunk: + # We may be dealing with a serialized session, in which case keys + # followed by a pipe are preceding the serialized data. + unserialized_data = {} + while 1: + try: + key = _read_until(b'|'); + except ValueError: + break # end of stream + if return_unicode: + key = unicode(key, charset) + unserialized_data[key] = _unserialize() + else: + unserialized_data = _unserialize() + + return unserialized_data def loads(data, charset='utf-8', errors=default_errors, decode_strings=False, - object_hook=None, array_hook=None): + object_hook=None, array_hook=None, return_unicode=False): """Read a PHP-serialized object hierarchy from a string. Characters in the string past the object's representation are ignored. On Python 3 the string must be a bytestring. """ + # Convert unicode strings to byte strings. + if type(data) == unicode: + data = data.encode(charset) + return_unicode = True return load(BytesIO(data), charset, errors, decode_strings, - object_hook, array_hook) + object_hook, array_hook, return_unicode) def dump(data, fp, charset='utf-8', errors=default_errors, object_hook=None): diff --git a/setup.py b/setup.py index 49f17af..f1f5ead 100644 --- a/setup.py +++ b/setup.py @@ -21,8 +21,8 @@ def get_docs(): name='phpserialize', author='Armin Ronacher', author_email='armin.ronacher@active-4.com', - version='1.3', - url='http://github.com/mitsuhiko/phpserialize', + version='1.6', + url='http://github.com/hdbreaker/phpserialize', py_modules=['phpserialize'], description='a port of the serialize and unserialize ' 'functions of php to python.',