From 8f2c96abea6081033af4b863d1c57bf5d3fa9653 Mon Sep 17 00:00:00 2001 From: Nathan Date: Thu, 20 Dec 2012 14:09:31 +0000 Subject: [PATCH 01/13] Added handling for recursion to return None --- phpserialize.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/phpserialize.py b/phpserialize.py index 036cfb4..1044f2f 100644 --- a/phpserialize.py +++ b/phpserialize.py @@ -507,7 +507,12 @@ def _unserialize(): if decode_strings: name = name.decode(charset, errors) return object_hook(name, dict(_load_array())) - raise ValueError('unexpected opcode') + if type_ == b'r': + # recursion + _expect(b':') + data = _read_until(b';') + return None + raise ValueError('unexpected opcode - %s' % repr(type_)) return _unserialize() From c123f596236722ecd54685bff2e8fd431dea26f5 Mon Sep 17 00:00:00 2001 From: Fabien Wald Date: Mon, 11 Feb 2013 18:44:36 +0000 Subject: [PATCH 02/13] Account for serialized sessions. --- phpserialize.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/phpserialize.py b/phpserialize.py index 1044f2f..c468e06 100644 --- a/phpserialize.py +++ b/phpserialize.py @@ -514,7 +514,22 @@ def _unserialize(): return None raise ValueError('unexpected opcode - %s' % repr(type_)) - return _unserialize() + chunk = _read_until(':'); + if '|' in chunk: + # We may be dealing with a serialized session, in which case keys + # followed by a pipe are preceding the serialized data. + fp.seek(0) # Reset pointer + unserialized_data = {} + while 1: + try: + key = _read_until('|'); + except ValueError: + break # end of stream + unserialized_data[key] = _unserialize() + else: + unserialized_data = _unserialize() + + return unserialized_data def loads(data, charset='utf-8', errors=default_errors, decode_strings=False, From 08e8f52618d4bb99c53a1ba613069d02efeb1a2c Mon Sep 17 00:00:00 2001 From: Fabien Wald Date: Tue, 12 Feb 2013 16:01:46 +0000 Subject: [PATCH 03/13] Fix file pointer mess up. --- phpserialize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/phpserialize.py b/phpserialize.py index c468e06..6d8e87e 100644 --- a/phpserialize.py +++ b/phpserialize.py @@ -514,11 +514,12 @@ def _unserialize(): return None raise ValueError('unexpected opcode - %s' % repr(type_)) + fp_position = fp.tell() chunk = _read_until(':'); + fp.seek(fp_position) # Reset pointer if '|' in chunk: # We may be dealing with a serialized session, in which case keys # followed by a pipe are preceding the serialized data. - fp.seek(0) # Reset pointer unserialized_data = {} while 1: try: From 89d402a26990f8f82c02f9779bb4fb514a13480d Mon Sep 17 00:00:00 2001 From: Fabien Wald Date: Tue, 12 Feb 2013 16:10:30 +0000 Subject: [PATCH 04/13] Add test for PHP session. --- tests.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests.py b/tests.py index f6d7066..c89bb14 100644 --- a/tests.py +++ b/tests.py @@ -99,6 +99,11 @@ def test_basic_object_hook(self): self.assertEqual(user.username, 'admin') self.assertEqual(user.__name__, 'WP_User') + def test_session(self): + data = b'session_key|a:1:{s:3:"foo";s:3:"bar";}' + session = phpserialize.loads(data) + self.assertEqual(session, {'session_key': {'foo': 'bar'}}) + if __name__ == '__main__': unittest.main() From 61bb01dd85427c3816ba0258b24c20a4eab1bede Mon Sep 17 00:00:00 2001 From: Fabien Wald Date: Tue, 12 Feb 2013 16:10:50 +0000 Subject: [PATCH 05/13] Bump version number to 1.4. --- phpserialize.py | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/phpserialize.py b/phpserialize.py index 6d8e87e..09d18ff 100644 --- a/phpserialize.py +++ b/phpserialize.py @@ -236,6 +236,8 @@ class WP_User extends WP_UserBase { Changelog ========= + 1.4 + - added support for PHP sessions 1.3 - added support for Python 3 diff --git a/setup.py b/setup.py index 49f17af..6e80398 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_docs(): name='phpserialize', author='Armin Ronacher', author_email='armin.ronacher@active-4.com', - version='1.3', + version='1.4', url='http://github.com/mitsuhiko/phpserialize', py_modules=['phpserialize'], description='a port of the serialize and unserialize ' From 7bfa14078dd19099f82059f4332f8039cb99c6de Mon Sep 17 00:00:00 2001 From: Fabien Wald Date: Tue, 12 Feb 2013 16:21:31 +0000 Subject: [PATCH 06/13] Update URL. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6e80398..59b0001 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ def get_docs(): author='Armin Ronacher', author_email='armin.ronacher@active-4.com', version='1.4', - url='http://github.com/mitsuhiko/phpserialize', + url='https://github.com/nathanwalsh/phpserialize', py_modules=['phpserialize'], description='a port of the serialize and unserialize ' 'functions of php to python.', From 60740676a5464348720ece7914a2a038ed07f09d Mon Sep 17 00:00:00 2001 From: Fabien Wald Date: Tue, 12 Feb 2013 16:25:11 +0000 Subject: [PATCH 07/13] Better test for PHP session. --- tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests.py b/tests.py index c89bb14..29c2eb8 100644 --- a/tests.py +++ b/tests.py @@ -100,9 +100,9 @@ def test_basic_object_hook(self): self.assertEqual(user.__name__, 'WP_User') def test_session(self): - data = b'session_key|a:1:{s:3:"foo";s:3:"bar";}' + data = b'foo|a:1:{s:1:"a";s:1:"b";}bar|a:1:{s:1:"c";s:1:"d";}' session = phpserialize.loads(data) - self.assertEqual(session, {'session_key': {'foo': 'bar'}}) + self.assertEqual(session, {'foo': {'a': 'b'}, 'bar': {'c': 'd'}}) if __name__ == '__main__': From 85f3008c9aa022cecd33d1da800e50ebecaeb14f Mon Sep 17 00:00:00 2001 From: Fabien Wald Date: Wed, 20 Feb 2013 13:51:59 +0000 Subject: [PATCH 08/13] Update readme. --- README | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README b/README index ea49e64..053c799 100644 --- a/README +++ b/README @@ -1,3 +1,8 @@ -a port of the serialize and unserialize functions of php to python. This module +A port of the serialize and unserialize functions of php to python. This module implements the python serialization interface (eg: provides dumps, loads and similar functions). + + +Sessions unserializing is also supported if they were saved using PHP's +internal serializer and without encryption (see +http://www.hardened-php.net/suhosin/configuration.html#suhosin.session.encrypt). \ No newline at end of file From d94e1f30b1d437e8302a848ece9b3888a8a7e619 Mon Sep 17 00:00:00 2001 From: Fabien Wald Date: Mon, 8 Apr 2013 18:03:44 +0100 Subject: [PATCH 09/13] Add support for unicode strings. --- phpserialize.py | 14 +++++++++++--- tests.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/phpserialize.py b/phpserialize.py index 09d18ff..2d21041 100644 --- a/phpserialize.py +++ b/phpserialize.py @@ -414,7 +414,7 @@ def _serialize(obj, keypos): def load(fp, charset='utf-8', errors=default_errors, decode_strings=False, - object_hook=None, array_hook=None): + object_hook=None, array_hook=None, return_unicode=False): """Read a string from the open file object `fp` and interpret it as a data stream of PHP-serialized objects, reconstructing and returning the original object hierarchy. @@ -492,6 +492,8 @@ def _unserialize(): _expect(b'"') if decode_strings: data = data.decode(charset, errors) + if return_unicode: + data = unicode(data, charset) _expect(b';') return data if type_ == b'a': @@ -528,6 +530,8 @@ def _unserialize(): key = _read_until('|'); except ValueError: break # end of stream + if return_unicode: + key = unicode(key, charset) unserialized_data[key] = _unserialize() else: unserialized_data = _unserialize() @@ -536,13 +540,17 @@ def _unserialize(): def loads(data, charset='utf-8', errors=default_errors, decode_strings=False, - object_hook=None, array_hook=None): + object_hook=None, array_hook=None, return_unicode=False): """Read a PHP-serialized object hierarchy from a string. Characters in the string past the object's representation are ignored. On Python 3 the string must be a bytestring. """ + # Convert unicode strings to byte strings. + if type(data) == unicode: + data = data.encode(charset) + return_unicode = True return load(BytesIO(data), charset, errors, decode_strings, - object_hook, array_hook) + object_hook, array_hook, return_unicode) def dump(data, fp, charset='utf-8', errors=default_errors, object_hook=None): diff --git a/tests.py b/tests.py index 29c2eb8..e065f92 100644 --- a/tests.py +++ b/tests.py @@ -104,6 +104,26 @@ def test_session(self): session = phpserialize.loads(data) self.assertEqual(session, {'foo': {'a': 'b'}, 'bar': {'c': 'd'}}) + def test_loads_unicode_strings(self): + data = u's:6:"Björk";' + result = phpserialize.loads(data) + self.assertEqual(result, u'Björk') + + def test_loads_unicode_dict(self): + data = u'a:1:{s:6:"Björk";s:16:"Guðmundsdóttir";}' + result = phpserialize.loads(data) + self.assertEqual(result, {u'Björk': u'Guðmundsdóttir'}) + + def test_basic_unicode_object_hook(self): + data = u'O:8:"stdClass":1:{s:4:"name";s:6:"Björk";}' + user = phpserialize.loads(data, object_hook=phpserialize.phpobject) + self.assertEqual(user.name, u'Björk') + + def test_session_loads_unicode_strings(self): + data = u'Björk|a:1:{s:6:"Björk";s:16:"Guðmundsdóttir";}' + session = phpserialize.loads(data) + self.assertEqual(session, {u'Björk': {u'Björk': u'Guðmundsdóttir'}}) + if __name__ == '__main__': unittest.main() From 7dfb3a72ec18b8c3e7094e726525065787ea612f Mon Sep 17 00:00:00 2001 From: Fabien Wald Date: Mon, 8 Apr 2013 18:09:04 +0100 Subject: [PATCH 10/13] Bump version number to 1.5. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 59b0001..3631064 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_docs(): name='phpserialize', author='Armin Ronacher', author_email='armin.ronacher@active-4.com', - version='1.4', + version='1.5', url='https://github.com/nathanwalsh/phpserialize', py_modules=['phpserialize'], description='a port of the serialize and unserialize ' From 8e3cfec1086f260157caa830585d156706488046 Mon Sep 17 00:00:00 2001 From: Vasilis Gerakaris Date: Thu, 20 Feb 2020 13:08:35 +0200 Subject: [PATCH 11/13] update changelog to reflect version changes --- phpserialize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/phpserialize.py b/phpserialize.py index 2d21041..5a9f994 100644 --- a/phpserialize.py +++ b/phpserialize.py @@ -235,7 +235,8 @@ class WP_User extends WP_UserBase { Changelog ========= - + 1.5 + - added support for unicode strings 1.4 - added support for PHP sessions 1.3 From a3edb1392f930ee44acbc3806d39dba3cce907c8 Mon Sep 17 00:00:00 2001 From: Vasilis Gerakaris Date: Thu, 20 Feb 2020 13:12:01 +0200 Subject: [PATCH 12/13] Revert url to mitsuhiko's repo --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3631064..ef50cd5 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ def get_docs(): author='Armin Ronacher', author_email='armin.ronacher@active-4.com', version='1.5', - url='https://github.com/nathanwalsh/phpserialize', + url='https://github.com/mitsuhiko/phpserialize', py_modules=['phpserialize'], description='a port of the serialize and unserialize ' 'functions of php to python.', From 77560563f7a87a7191d4e02ebeebd7606c2bc89e Mon Sep 17 00:00:00 2001 From: Vasilis Gerakaris Date: Thu, 20 Feb 2020 13:12:25 +0200 Subject: [PATCH 13/13] Fix python3 session handling & tests Bump version to 1.5.1 --- phpserialize.py | 8 +++++--- setup.py | 2 +- tests.py | 8 +++++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/phpserialize.py b/phpserialize.py index 5a9f994..8c139ed 100644 --- a/phpserialize.py +++ b/phpserialize.py @@ -520,17 +520,19 @@ def _unserialize(): raise ValueError('unexpected opcode - %s' % repr(type_)) fp_position = fp.tell() - chunk = _read_until(':'); + chunk = _read_until(b':'); fp.seek(fp_position) # Reset pointer - if '|' in chunk: + if b'|' in chunk: # We may be dealing with a serialized session, in which case keys # followed by a pipe are preceding the serialized data. unserialized_data = {} while 1: try: - key = _read_until('|'); + key = _read_until(b'|'); except ValueError: break # end of stream + if decode_strings: + key = key.decode(charset, errors) if return_unicode: key = unicode(key, charset) unserialized_data[key] = _unserialize() diff --git a/setup.py b/setup.py index ef50cd5..6ee0797 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_docs(): name='phpserialize', author='Armin Ronacher', author_email='armin.ronacher@active-4.com', - version='1.5', + version='1.5.1', url='https://github.com/mitsuhiko/phpserialize', py_modules=['phpserialize'], description='a port of the serialize and unserialize ' diff --git a/tests.py b/tests.py index e065f92..097e0b6 100644 --- a/tests.py +++ b/tests.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import unittest import phpserialize +from collections import OrderedDict class PhpSerializeTestCase(unittest.TestCase): @@ -32,7 +33,8 @@ def test_dumps_tuple(self): b'a:3:{i:0;i:7;i:1;i:8;i:2;i:9;}') def test_dumps_dict(self): - self.assertEqual(phpserialize.dumps({'a': 1, 'b': 2, 'c': 3}), + od = OrderedDict({'a': 1, 'c': 3, 'b': 2}) + self.assertEqual(phpserialize.dumps(od), b'a:3:{s:1:"a";i:1;s:1:"c";i:3;s:1:"b";i:2;}') def test_loads_dict(self): @@ -88,7 +90,7 @@ def dump_object_hook(obj): x = phpserialize.dumps(user, object_hook=dump_object_hook) y = phpserialize.loads(x, object_hook=load_object_hook, decode_strings=True) - self.assert_(b'WP_User' in x) + self.assertTrue(b'WP_User' in x) self.assertEqual(type(y), type(user)) self.assertEqual(y.username, user.username) @@ -101,7 +103,7 @@ def test_basic_object_hook(self): def test_session(self): data = b'foo|a:1:{s:1:"a";s:1:"b";}bar|a:1:{s:1:"c";s:1:"d";}' - session = phpserialize.loads(data) + session = phpserialize.loads(data, decode_strings=True) self.assertEqual(session, {'foo': {'a': 'b'}, 'bar': {'c': 'd'}}) def test_loads_unicode_strings(self):