From 75cdb5dcd2770eb4dd06968d2b4c2b47f7f8e1b5 Mon Sep 17 00:00:00 2001 From: "Aoi.Kuiyuyou" Date: Thu, 30 Oct 2014 12:04:20 -0700 Subject: [PATCH 01/16] remove dependency on |pyutil| so that file |zbase62.py| is self-contained thus can be used in a drop-in way. --- zbase62/zbase62.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/zbase62/zbase62.py b/zbase62/zbase62.py index b47dd2e..bf5656c 100644 --- a/zbase62/zbase62.py +++ b/zbase62/zbase62.py @@ -9,8 +9,37 @@ # from the Python Standard Library import string -from pyutil.assertutil import _assert, precondition, postcondition -from pyutil.mathutil import div_ceil, log_ceil, log_floor +#/ Copied from |pyutil|: +## https://github.com/simplegeo/pyutil/blob/bd40e624771c84859045911082480e74ff01fcd4/pyutil/mathutil.py#L44 +## +## |zbase62| (https://github.com/simplegeo/zbase62) +## and |pyutil| (https://github.com/simplegeo/pyutil) have same license options. +## +## ---BEG +def log_ceil(n, b): + """ + The smallest integer k such that b^k >= n. + log_ceil(n, 2) is the number of bits needed to store any of n values, e.g. + the number of bits needed to store any of 128 possible values is 7. + """ + p = 1 + k = 0 + while p < n: + p *= b + k += 1 + return k + +def log_floor(n, b): + """ + The largest integer k such that b^k <= n. + """ + p = 1 + k = 0 + while p <= n: + p *= b + k += 1 + return k - 1 +## ---END chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" From d3dac6373d7834380ad047b6b05526ab1bd6593d Mon Sep 17 00:00:00 2001 From: "Aoi.Kuiyuyou" Date: Thu, 30 Oct 2014 12:11:37 -0700 Subject: [PATCH 02/16] make code work on Python 2.4+ and 3.0+ --- zbase62/zbase62.py | 66 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/zbase62/zbase62.py b/zbase62/zbase62.py index bf5656c..729c073 100644 --- a/zbase62/zbase62.py +++ b/zbase62/zbase62.py @@ -8,6 +8,7 @@ # from the Python Standard Library import string +import sys #/ Copied from |pyutil|: ## https://github.com/simplegeo/pyutil/blob/bd40e624771c84859045911082480e74ff01fcd4/pyutil/mathutil.py#L44 @@ -41,12 +42,27 @@ def log_floor(n, b): return k - 1 ## ---END -chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +IS_PY2 = sys.version_info[0] == 2 + +if sys.version_info[:2] <= (3, 0): + maketrans = string.maketrans +else: + maketrans = bytes.maketrans + +if IS_PY2: + translate = string.translate +else: + translate = bytes.translate +chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +if not IS_PY2: + chars = chars.encode('ascii') vals = ''.join([chr(i) for i in range(62)]) -c2vtranstable = string.maketrans(chars, vals) -v2ctranstable = string.maketrans(vals, chars) -identitytranstable = string.maketrans(chars, chars) +if not IS_PY2: + vals = vals.encode('latin') +c2vtranstable = maketrans(chars, vals) +v2ctranstable = maketrans(vals, chars) +identitytranstable = maketrans(chars, chars) def b2a(os): """ @@ -83,7 +99,10 @@ def b2a_l(os, lengthinbits): @return the contents of os in base-62 encoded form """ - os = [ord(o) for o in reversed(os)] # treat os as big-endian -- and we want to process the least-significant o first + os = reversed(os) # treat os as big-endian -- and we want to process the least-significant o first + + if IS_PY2: + os = [ord(o) for o in os] value = 0 numvalues = 1 # the number of possible values that value could be @@ -97,8 +116,22 @@ def b2a_l(os, lengthinbits): chars.append(value % 62) value //= 62 numvalues //= 62 - - return string.translate(''.join([chr(c) for c in reversed(chars)]), v2ctranstable) # make it big-endian + + schars = ''.join([chr(c) for c in reversed(chars)]) + + if IS_PY2: + bchars = schars + else: + bchars = bytes(schars, 'latin') + + bchars = translate(bchars, v2ctranstable) # make it big-endian + + if IS_PY2: + schars = bchars + else: + schars = str(bchars, 'ascii') + + return schars def num_octets_that_encode_to_this_many_chars(numcs): return log_floor(62**numcs, 256) @@ -127,7 +160,13 @@ def a2b_l(cs, lengthinbits): @return the data encoded in cs """ - cs = [ord(c) for c in reversed(string.translate(cs, c2vtranstable))] # treat cs as big-endian -- and we want to process the least-significant c first + if not IS_PY2: + cs = cs.encode('ascii') + + cs = reversed(translate(cs, c2vtranstable)) # treat cs as big-endian -- and we want to process the least-significant c first + + if IS_PY2: + cs = [ord(c) for c in cs] value = 0 numvalues = 1 # the number of possible values that value could be @@ -142,5 +181,12 @@ def a2b_l(cs, lengthinbits): bytes.append(value % 256) value //= 256 numvalues //= 256 - - return ''.join([chr(b) for b in reversed(bytes)]) # make it big-endian + + schars = ''.join([chr(b) for b in reversed(bytes)]) # make it big-endian + + if IS_PY2: + bchars = schars + else: + bchars = schars.encode('latin') + + return bchars From 41253f397d552cf75476a85d345a8490601d8f40 Mon Sep 17 00:00:00 2001 From: "Aoi.Kuiyuyou" Date: Thu, 30 Oct 2014 13:20:52 -0700 Subject: [PATCH 03/16] add |README.md| --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..0ba683b --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# zbase62 +A fork of [simplegeo/zbase62](https://github.com/simplegeo/zbase62). + +Changes: + - Remove dependency on [pyutil](https://github.com/simplegeo/pyutil) so that file [zbase62.py](/zbase62/zbase62.py) is self-contained thus can be used in a drop-in way. + - Make code work on Python 3 too. + +Python: 2.4+ and 3.0+ From 0e8ca2fe5c09ad866cfef29cb7df592a6c50c855 Mon Sep 17 00:00:00 2001 From: Thomas Steinacher Date: Thu, 22 Sep 2016 11:49:14 -0700 Subject: [PATCH 04/16] Fix print statement --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 722fbb5..ad1202d 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ if mo: verstr = mo.group(1) else: - print "unable to find version in %s" % (VERSIONFILE,) + print("unable to find version in %s" % (VERSIONFILE,)) raise RuntimeError("if %s.py exists, it must be well-formed" % (VERSIONFILE,)) setup_requires = [] From 8d3f8d802e23d1a8d748d5df95353d4515807296 Mon Sep 17 00:00:00 2001 From: Thomas Steinacher Date: Thu, 22 Sep 2016 11:50:45 -0700 Subject: [PATCH 05/16] py3 exception + whitespace fixes --- ez_setup.py | 43 +------------------------------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/ez_setup.py b/ez_setup.py index 6d76468..ccc14e2 100644 --- a/ez_setup.py +++ b/ez_setup.py @@ -103,7 +103,7 @@ def do_download(): return do_download() try: pkg_resources.require("setuptools>="+version); return - except pkg_resources.VersionConflict, e: + except pkg_resources.VersionConflict as e: if was_imported: print >>sys.stderr, ( "The required version of setuptools (>=%s) is not available, and\n" @@ -165,41 +165,6 @@ def download_setuptools( if dst: dst.close() return os.path.realpath(saveto) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def main(argv, version=DEFAULT_VERSION): """Install or upgrade setuptools and EasyInstall""" try: @@ -276,9 +241,3 @@ def update_md5(filenames): update_md5(sys.argv[2:]) else: main(sys.argv[1:]) - - - - - - From 915f9bc40b1803d9c14a61dcd23934e83612cb5b Mon Sep 17 00:00:00 2001 From: Thomas Steinacher Date: Thu, 22 Sep 2016 11:53:11 -0700 Subject: [PATCH 06/16] More print statements --- ez_setup.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ez_setup.py b/ez_setup.py index ccc14e2..0fcdc07 100644 --- a/ez_setup.py +++ b/ez_setup.py @@ -70,9 +70,9 @@ def _validate_md5(egg_name, data): if egg_name in md5_data: digest = md5(data).hexdigest() if digest != md5_data[egg_name]: - print >>sys.stderr, ( + print( "md5 validation of %s failed! (Possible download problem?)" - % egg_name + % egg_name, file=sys.stderr ) sys.exit(2) return data @@ -105,12 +105,11 @@ def do_download(): pkg_resources.require("setuptools>="+version); return except pkg_resources.VersionConflict as e: if was_imported: - print >>sys.stderr, ( + print( "The required version of setuptools (>=%s) is not available, and\n" "can't be installed while this script is running. Please install\n" " a more recent version first, using 'easy_install -U setuptools'." - "\n\n(Currently using %r)" - ) % (version, e.args[0]) + "\n\n(Currently using %r)" % (version, e.args[0]), file=sys.stderr) sys.exit(2) else: del pkg_resources, sys.modules['pkg_resources'] # reload ok @@ -181,9 +180,10 @@ def main(argv, version=DEFAULT_VERSION): os.unlink(egg) else: if setuptools.__version__ == '0.0.1': - print >>sys.stderr, ( + print( "You have an obsolete version of setuptools installed. Please\n" - "remove it from your system entirely before rerunning this script." + "remove it from your system entirely before rerunning this script.", + file=sys.stderr ) sys.exit(2) @@ -203,8 +203,8 @@ def main(argv, version=DEFAULT_VERSION): from setuptools.command.easy_install import main main(argv) else: - print "Setuptools version",version,"or greater has been installed." - print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + print("Setuptools version",version,"or greater has been installed.") + print('(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)') def update_md5(filenames): """Update our built-in md5 registry""" @@ -227,7 +227,7 @@ def update_md5(filenames): match = re.search("\nmd5_data = {\n([^}]+)}", src) if not match: - print >>sys.stderr, "Internal error!" + print("Internal error!", file=sys.stderr) sys.exit(2) src = src[:match.start(1)] + repl + src[match.end(1):] From 850decc8927d463ec016b1c18b743d5c4753905a Mon Sep 17 00:00:00 2001 From: Thomas Steinacher Date: Thu, 22 Sep 2016 13:43:31 -0700 Subject: [PATCH 07/16] fix installation process --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ad1202d..b3d68ed 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ # "sdist" or "bdist_egg"), unless there is a zbase62.egg-info/SOURCE.txt file # present which contains a complete list of files that should be included. # http://pypi.python.org/pypi/setuptools_darcs -setup_requires.append('setuptools_darcs >= 1.1.0') +#setup_requires.append('setuptools_darcs >= 1.1.0') data_fnames=[ 'COPYING.GPL', 'COPYING.TGPPL.html', 'COPYING.SPL.txt', 'README.txt' ] From e136db236746db99a60dcccc3df67853621e46d1 Mon Sep 17 00:00:00 2001 From: Thomas Steinacher Date: Thu, 22 Sep 2016 13:43:37 -0700 Subject: [PATCH 08/16] fix tests --- zbase62/test/test_base62.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zbase62/test/test_base62.py b/zbase62/test/test_base62.py index 6f859c1..0231287 100644 --- a/zbase62/test/test_base62.py +++ b/zbase62/test/test_base62.py @@ -23,7 +23,7 @@ def _test_num_octets_that_encode_to_this_many_chars(self, chars, octets): def _test_ende(self, bs): alphas=zbase62.b2a(bs) bs2=zbase62.a2b(alphas) - assert bs2 == bs, "bs2: %s:%s, bs: %s:%s, alphas: %s:%s" % (len(bs2), `bs2`, len(bs), `bs`, len(alphas), `alphas`) + assert bs2 == bs, "bs2: %s:%s, bs: %s:%s, alphas: %s:%s" % (len(bs2), repr(bs2), len(bs), repr(bs), len(alphas), repr(alphas)) def test_num_octets_that_encode_to_this_many_chars(self): return self._test_num_octets_that_encode_to_this_many_chars(2, 1) From e13d2c748ccdb0cafe6465961a0c6a4111ee219f Mon Sep 17 00:00:00 2001 From: Thomas Steinacher Date: Fri, 23 Sep 2016 15:17:17 -0700 Subject: [PATCH 09/16] Fix print function --- ez_setup.py | 3 +++ setup.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/ez_setup.py b/ez_setup.py index 0fcdc07..804d30f 100644 --- a/ez_setup.py +++ b/ez_setup.py @@ -13,6 +13,9 @@ This file can also be run as a script to install or upgrade setuptools. """ + +from __future__ import print_function + import sys DEFAULT_VERSION = "0.6c11" DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3] diff --git a/setup.py b/setup.py index b3d68ed..040297e 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,8 @@ # # See README.txt for licensing information. +from __future__ import print_function + import glob, os, re, sys eggs = glob.glob('darcsver-*.egg') From d328dacd62fe09fe8d4d7d34751ea29d03921307 Mon Sep 17 00:00:00 2001 From: Vyacheslav Tverskoy Date: Tue, 10 Dec 2019 23:17:15 +0500 Subject: [PATCH 10/16] Fix pyutil import in test --- zbase62/test/test_base62.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/zbase62/test/test_base62.py b/zbase62/test/test_base62.py index 0231287..0815c1d 100644 --- a/zbase62/test/test_base62.py +++ b/zbase62/test/test_base62.py @@ -8,8 +8,11 @@ import random, unittest -# http://zooko.com/repos/pyutil -from pyutil import mathutil, randutil +def div_ceil(n, d): + """ + The smallest integer k such that k*d >= n. + """ + return int((n//d) + (n%d != 0)) from zbase62 import zbase62 @@ -55,7 +58,7 @@ def test_ende_longrandstr(self): def test_odd_sizes(self): for j in range(2**6): lib = random.randrange(1, 2**8) - numos = mathutil.div_ceil(lib, 8) + numos = div_ceil(lib, 8) bs = insecurerandstr(numos) # zero-out unused least-sig bits if lib%8: From e901e8abb0e574f531b891c9c66253af632c8068 Mon Sep 17 00:00:00 2001 From: Vyacheslav Tverskoy Date: Tue, 10 Dec 2019 23:25:35 +0500 Subject: [PATCH 11/16] Remove unused param --- zbase62/test/test_base62.py | 2 +- zbase62/zbase62.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/zbase62/test/test_base62.py b/zbase62/test/test_base62.py index 0815c1d..0bbefcb 100644 --- a/zbase62/test/test_base62.py +++ b/zbase62/test/test_base62.py @@ -66,7 +66,7 @@ def test_odd_sizes(self): b = b >> (8 - (lib%8)) b = b << (8 - (lib%8)) bs = bs[:-1] + chr(b) - asl = zbase62.b2a_l(bs, lib) + asl = zbase62.b2a_l(bs) assert len(asl) == zbase62.num_chars_that_this_many_octets_encode_to(numos) # the size of the base-62 encoding must be just right bs2l = zbase62.a2b_l(asl, lib) assert len(bs2l) == numos # the size of the result must be just right diff --git a/zbase62/zbase62.py b/zbase62/zbase62.py index 729c073..023db7b 100644 --- a/zbase62/zbase62.py +++ b/zbase62/zbase62.py @@ -70,14 +70,13 @@ def b2a(os): @return the contents of os in base-62 encoded form """ - cs = b2a_l(os, len(os)*8) + cs = b2a_l(os) assert num_octets_that_encode_to_this_many_chars(len(cs)) == len(os), "%s != %s, numchars: %s" % (num_octets_that_encode_to_this_many_chars(len(cs)), len(os), len(cs)) return cs -def b2a_l(os, lengthinbits): +def b2a_l(os): """ @param os the data to be encoded (a string) - @param lengthinbits the number of bits of data in os to be encoded b2a_l() will generate a base-62 encoded string big enough to encode lengthinbits bits. So for example if os is 3 bytes long and lengthinbits is From eb4943753718dee24b16e2ed88bcafe5c843324a Mon Sep 17 00:00:00 2001 From: Vyacheslav Tverskoy Date: Tue, 10 Dec 2019 23:44:58 +0500 Subject: [PATCH 12/16] Fix tests on py3 --- zbase62/test/test_base62.py | 43 +++++++++++++++++++------------------ zbase62/zbase62.py | 2 ++ 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/zbase62/test/test_base62.py b/zbase62/test/test_base62.py index 0bbefcb..fc88cfc 100644 --- a/zbase62/test/test_base62.py +++ b/zbase62/test/test_base62.py @@ -5,9 +5,11 @@ # Permission is hereby granted, free of charge, to any person obtaining a copy # of this work to deal in this work without restriction (including the rights # to use, modify, distribute, sublicense, and/or sell copies). - +import os, sys import random, unittest +IS_PY2 = sys.version_info[0] == 2 + def div_ceil(n, d): """ The smallest integer k such that k*d >= n. @@ -16,8 +18,8 @@ def div_ceil(n, d): from zbase62 import zbase62 -def insecurerandstr(n): - return ''.join(map(chr, map(random.randrange, [0]*n, [256]*n))) +def random_bytes(n): + return os.urandom(n) class T(unittest.TestCase): def _test_num_octets_that_encode_to_this_many_chars(self, chars, octets): @@ -29,53 +31,52 @@ def _test_ende(self, bs): assert bs2 == bs, "bs2: %s:%s, bs: %s:%s, alphas: %s:%s" % (len(bs2), repr(bs2), len(bs), repr(bs), len(alphas), repr(alphas)) def test_num_octets_that_encode_to_this_many_chars(self): - return self._test_num_octets_that_encode_to_this_many_chars(2, 1) - return self._test_num_octets_that_encode_to_this_many_chars(3, 2) - return self._test_num_octets_that_encode_to_this_many_chars(5, 3) - return self._test_num_octets_that_encode_to_this_many_chars(6, 4) + self._test_num_octets_that_encode_to_this_many_chars(2, 1) + self._test_num_octets_that_encode_to_this_many_chars(3, 2) + self._test_num_octets_that_encode_to_this_many_chars(5, 3) + self._test_num_octets_that_encode_to_this_many_chars(6, 4) + + def test_empty(self): + self._test_ende(b'') def test_ende_0x00(self): - return self._test_ende('\x00') + self._test_ende(b'\x00') def test_ende_0x01(self): - return self._test_ende('\x01') + self._test_ende(b'\x01') def test_ende_0x0100(self): - return self._test_ende('\x01\x00') + self._test_ende(b'\x01\x00') def test_ende_0x000000(self): - return self._test_ende('\x00\x00\x00') + self._test_ende(b'\x00\x00\x00') def test_ende_0x010000(self): - return self._test_ende('\x01\x00\x00') + self._test_ende(b'\x01\x00\x00') def test_ende_randstr(self): - return self._test_ende(insecurerandstr(2**4)) + self._test_ende(random_bytes(2 ** 4)) def test_ende_longrandstr(self): - return self._test_ende(insecurerandstr(random.randrange(0, 2**10))) + self._test_ende(random_bytes(random.randrange(0, 2 ** 10))) def test_odd_sizes(self): for j in range(2**6): lib = random.randrange(1, 2**8) numos = div_ceil(lib, 8) - bs = insecurerandstr(numos) + bs = random_bytes(numos) # zero-out unused least-sig bits if lib%8: - b=ord(bs[-1]) + b=ord(bs[-1]) if IS_PY2 else bs[-1] b = b >> (8 - (lib%8)) b = b << (8 - (lib%8)) - bs = bs[:-1] + chr(b) + bs = bs[:-1] + (chr(b) if IS_PY2 else bytes([b])) asl = zbase62.b2a_l(bs) assert len(asl) == zbase62.num_chars_that_this_many_octets_encode_to(numos) # the size of the base-62 encoding must be just right bs2l = zbase62.a2b_l(asl, lib) assert len(bs2l) == numos # the size of the result must be just right assert bs == bs2l -def suite(): - suite = unittest.makeSuite(T, 'test') - return suite - if __name__ == "__main__": unittest.main() diff --git a/zbase62/zbase62.py b/zbase62/zbase62.py index 023db7b..894e83f 100644 --- a/zbase62/zbase62.py +++ b/zbase62/zbase62.py @@ -70,6 +70,8 @@ def b2a(os): @return the contents of os in base-62 encoded form """ + if not isinstance(os, bytes): + os = os.encode('utf-8') cs = b2a_l(os) assert num_octets_that_encode_to_this_many_chars(len(cs)) == len(os), "%s != %s, numchars: %s" % (num_octets_that_encode_to_this_many_chars(len(cs)), len(os), len(cs)) return cs From ba3cf00d9adb61c477a54f9ba261d5a498b0999f Mon Sep 17 00:00:00 2001 From: Vyacheslav Tverskoy Date: Wed, 11 Dec 2019 00:25:29 +0500 Subject: [PATCH 13/16] Fix types --- zbase62/test/test_base62.py | 12 ++++++++++++ zbase62/zbase62.py | 31 +++++++++++++------------------ 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/zbase62/test/test_base62.py b/zbase62/test/test_base62.py index fc88cfc..15e0b96 100644 --- a/zbase62/test/test_base62.py +++ b/zbase62/test/test_base62.py @@ -9,6 +9,8 @@ import random, unittest IS_PY2 = sys.version_info[0] == 2 +if not IS_PY2: + unicode = str def div_ceil(n, d): """ @@ -77,6 +79,16 @@ def test_odd_sizes(self): assert len(bs2l) == numos # the size of the result must be just right assert bs == bs2l + def test_invalid(self): + print(zbase62.a2b('~!~')) + + def test_types(self): + assert type(zbase62.a2b(u'x')) == bytes + assert type(zbase62.a2b(b'x')) == bytes + + assert type(zbase62.b2a(u'x')) == unicode + assert type(zbase62.b2a(b'x')) == unicode + if __name__ == "__main__": unittest.main() diff --git a/zbase62/zbase62.py b/zbase62/zbase62.py index 894e83f..bc0dfd6 100644 --- a/zbase62/zbase62.py +++ b/zbase62/zbase62.py @@ -126,13 +126,9 @@ def b2a_l(os): bchars = bytes(schars, 'latin') bchars = translate(bchars, v2ctranstable) # make it big-endian - - if IS_PY2: - schars = bchars - else: - schars = str(bchars, 'ascii') - - return schars + assert type(bchars) == bytes + + return bchars.decode('utf-8') def num_octets_that_encode_to_this_many_chars(numcs): return log_floor(62**numcs, 256) @@ -161,8 +157,8 @@ def a2b_l(cs, lengthinbits): @return the data encoded in cs """ - if not IS_PY2: - cs = cs.encode('ascii') + if not isinstance(cs, bytes): + cs = cs.encode('utf-8') cs = reversed(translate(cs, c2vtranstable)) # treat cs as big-endian -- and we want to process the least-significant c first @@ -177,17 +173,16 @@ def a2b_l(cs, lengthinbits): numvalues *= 62 numvalues = 2**lengthinbits - bytes = [] + byte_list = [] while numvalues > 1: - bytes.append(value % 256) + byte_list.append(value % 256) value //= 256 numvalues //= 256 - - schars = ''.join([chr(b) for b in reversed(bytes)]) # make it big-endian - + + # make it big-endian + byte_list = reversed(byte_list) + if IS_PY2: - bchars = schars + return b''.join([chr(b) for b in byte_list]) else: - bchars = schars.encode('latin') - - return bchars + return bytes(byte_list) From a50f77b711fa59a17fb19e8a4909907ba1432d2b Mon Sep 17 00:00:00 2001 From: Vyacheslav Tverskoy Date: Thu, 12 Dec 2019 13:48:40 +0500 Subject: [PATCH 14/16] Remove print in invalid test --- zbase62/test/test_base62.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zbase62/test/test_base62.py b/zbase62/test/test_base62.py index 15e0b96..8d724c3 100644 --- a/zbase62/test/test_base62.py +++ b/zbase62/test/test_base62.py @@ -80,7 +80,8 @@ def test_odd_sizes(self): assert bs == bs2l def test_invalid(self): - print(zbase62.a2b('~!~')) + # doesn't fail + zbase62.a2b('~!~') def test_types(self): assert type(zbase62.a2b(u'x')) == bytes From ab5ec3c1c14d6f8611c2ea284599bb6dc91d4e00 Mon Sep 17 00:00:00 2001 From: Vyacheslav Tverskoy Date: Thu, 12 Dec 2019 14:10:46 +0500 Subject: [PATCH 15/16] Remove b2a_l --- zbase62/test/test_base62.py | 2 +- zbase62/zbase62.py | 37 ++++++++----------------------------- 2 files changed, 9 insertions(+), 30 deletions(-) diff --git a/zbase62/test/test_base62.py b/zbase62/test/test_base62.py index 8d724c3..45b2138 100644 --- a/zbase62/test/test_base62.py +++ b/zbase62/test/test_base62.py @@ -73,7 +73,7 @@ def test_odd_sizes(self): b = b >> (8 - (lib%8)) b = b << (8 - (lib%8)) bs = bs[:-1] + (chr(b) if IS_PY2 else bytes([b])) - asl = zbase62.b2a_l(bs) + asl = zbase62.b2a(bs) assert len(asl) == zbase62.num_chars_that_this_many_octets_encode_to(numos) # the size of the base-62 encoding must be just right bs2l = zbase62.a2b_l(asl, lib) assert len(bs2l) == numos # the size of the result must be just right diff --git a/zbase62/zbase62.py b/zbase62/zbase62.py index bc0dfd6..1558c54 100644 --- a/zbase62/zbase62.py +++ b/zbase62/zbase62.py @@ -70,36 +70,9 @@ def b2a(os): @return the contents of os in base-62 encoded form """ + original_len = len(os) if not isinstance(os, bytes): os = os.encode('utf-8') - cs = b2a_l(os) - assert num_octets_that_encode_to_this_many_chars(len(cs)) == len(os), "%s != %s, numchars: %s" % (num_octets_that_encode_to_this_many_chars(len(cs)), len(os), len(cs)) - return cs - -def b2a_l(os): - """ - @param os the data to be encoded (a string) - - b2a_l() will generate a base-62 encoded string big enough to encode - lengthinbits bits. So for example if os is 3 bytes long and lengthinbits is - 17, then b2a_l() will generate a 3-character- long base-62 encoded string - (since 3 chars is sufficient to encode more than 2^17 values). If os is 3 - bytes long and lengthinbits is 18 (or None), then b2a_l() will generate a - 4-character string (since 4 chars are required to hold 2^18 values). Note - that if os is 3 bytes long and lengthinbits is 17, the least significant 7 - bits of os are ignored. - - Warning: if you generate a base-62 encoded string with b2a_l(), and then someone else tries to - decode it by calling a2b() instead of a2b_l(), then they will (potentially) get a different - string than the one you encoded! So use b2a_l() only when you are sure that the encoding and - decoding sides know exactly which lengthinbits to use. If you do not have a way for the - encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and - a2b(). The only drawback to using b2a() over b2a_l() is that when you have a number of - bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-62 encoded - string that is one or two characters longer than necessary. - - @return the contents of os in base-62 encoded form - """ os = reversed(os) # treat os as big-endian -- and we want to process the least-significant o first if IS_PY2: @@ -128,7 +101,13 @@ def b2a_l(os): bchars = translate(bchars, v2ctranstable) # make it big-endian assert type(bchars) == bytes - return bchars.decode('utf-8') + bchars = bchars.decode('utf-8') + + assert num_octets_that_encode_to_this_many_chars(len(bchars)) == original_len, "%s != %s, numchars: %s" % ( + num_octets_that_encode_to_this_many_chars(len(bchars)), original_len(os), len(bchars) + ) + + return bchars def num_octets_that_encode_to_this_many_chars(numcs): return log_floor(62**numcs, 256) From 8ec363b2b781b917b7a4e17c975420f516d4d9a7 Mon Sep 17 00:00:00 2001 From: Vyacheslav Tverskoy Date: Thu, 12 Dec 2019 14:13:31 +0500 Subject: [PATCH 16/16] Clarify that div_ceil is copied from pyutil --- zbase62/test/test_base62.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zbase62/test/test_base62.py b/zbase62/test/test_base62.py index 45b2138..d261508 100644 --- a/zbase62/test/test_base62.py +++ b/zbase62/test/test_base62.py @@ -15,6 +15,8 @@ def div_ceil(n, d): """ The smallest integer k such that k*d >= n. + + Copied from https://pypi.org/project/pyutil/ """ return int((n//d) + (n%d != 0))