Skip to content

Commit 67d6b24

Browse files
authored
[bigint] Optimize parsing of numeric strings (#50)
This pull request includes several changes to the `decimojo` package, specifically focusing on improving the `BigInt` arithmetic operations and parsing of numeric strings. The most important changes include the introduction of a new method to check if a `BigInt` is zero, replacing string-based coefficients with list-based coefficients, and adding new augmented arithmetic assignment methods. Improvements to `BigInt` arithmetic operations: * `src/decimojo/bigint/arithmetics.mojo`: Introduced the `is_zero` method to simplify zero-checks in the `add` function. * `src/decimojo/bigint/arithmetics.mojo`: Corrected the return statements in the `add`, `subtract`, and `negative` functions to properly return the result. * `src/decimojo/bigint/arithmetics.mojo`: Simplified the `absolute` function by directly returning the negated value if the sign is set. Parsing and coefficient handling: * `src/decimojo/str.mojo`: Changed the `parse_string_of_number` function to return a list of `UInt8` coefficients instead of a string, improving efficiency and clarity. * `src/decimojo/bigint/bigint.mojo`: Updated the `from_string` method to use the new list-based coefficients and adjusted the logic to handle these coefficients. New augmented arithmetic assignment methods: * `src/decimojo/bigint/bigint.mojo`: Added `__iadd__` and `__isub__` methods for in-place addition and subtraction, respectively.
1 parent b8c9991 commit 67d6b24

File tree

3 files changed

+79
-53
lines changed

3 files changed

+79
-53
lines changed

src/decimojo/bigint/arithmetics.mojo

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ fn add(x1: BigInt, x2: BigInt) raises -> BigInt:
3838
"""
3939

4040
# If one of the numbers is zero, return the other number
41-
if len(x1.words) == 1 and x1.words[0] == 0:
41+
if x1.is_zero():
4242
return x2
43-
if len(x2.words) == 1 and x2.words[0] == 0:
43+
if x2.is_zero():
4444
return x1
4545

4646
# If signs are different, we use `subtract` instead
@@ -81,7 +81,7 @@ fn add(x1: BigInt, x2: BigInt) raises -> BigInt:
8181
if carry > 0:
8282
result.words.append(carry)
8383

84-
return result
84+
return result^
8585

8686

8787
fn subtract(x1: BigInt, x2: BigInt) raises -> BigInt:
@@ -157,7 +157,7 @@ fn subtract(x1: BigInt, x2: BigInt) raises -> BigInt:
157157
while len(result.words) > 1 and result.words[len(result.words) - 1] == 0:
158158
result.words.resize(len(result.words) - 1)
159159

160-
return result
160+
return result^
161161

162162

163163
fn negative(x: BigInt) -> BigInt:
@@ -171,7 +171,7 @@ fn negative(x: BigInt) -> BigInt:
171171
"""
172172
var result = x
173173
result.sign = not result.sign
174-
return result
174+
return result^
175175

176176

177177
fn absolute(x: BigInt) -> BigInt:
@@ -184,8 +184,6 @@ fn absolute(x: BigInt) -> BigInt:
184184
A new BigInt containing the absolute value of x.
185185
"""
186186
if x.sign:
187-
var result = x
188-
result.sign = False
189-
return result
187+
return -x
190188
else:
191189
return x

src/decimojo/bigint/bigint.mojo

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ mathematical methods that do not implement a trait.
2525

2626
from memory import UnsafePointer
2727
import testing
28+
import time
2829

2930
import decimojo.bigint.arithmetics
3031
import decimojo.bigint.comparison
@@ -244,41 +245,40 @@ struct BigInt(Absable, IntableRaising, Writable):
244245
@staticmethod
245246
fn from_string(value: String) raises -> BigInt:
246247
"""Initializes a BigInt from a string representation.
247-
The string is normalized with `deciomojo.str.parse_string_of_number()`.
248+
The string is normalized with `deciomojo.str.parse_numeric_string()`.
248249
249250
Args:
250251
value: The string representation of the BigInt.
251252
252253
Returns:
253254
The BigInt representation of the string.
254255
"""
255-
256-
var coef_string: String
256+
var coef: List[UInt8]
257257
var scale: Int
258258
var sign: Bool
259-
coef_string, scale, sign = decimojo.str.parse_string_of_number(value)
259+
coef, scale, sign = decimojo.str.parse_numeric_string(value)
260260

261261
# Check if the number is zero
262-
if coef_string == "0":
263-
return Self()
262+
if len(coef) == 1 and coef[0] == UInt8(0):
263+
return Self.from_raw_words(UInt32(0), sign=sign)
264264

265265
# Check whether the number is an integer
266266
# If the fractional part is not zero, raise an error
267267
# If the fractional part is zero, remove the fractional part
268268
if scale > 0:
269-
if scale >= len(coef_string):
269+
if scale >= len(coef):
270270
raise Error(
271271
"Error in `from_string`: The number is not an integer."
272272
)
273273
for i in range(1, scale + 1):
274-
if coef_string[-i] != "0":
274+
if coef[-i] != 0:
275275
raise Error(
276276
"Error in `from_string`: The number is not an integer."
277277
)
278-
coef_string = coef_string[:-scale]
278+
coef.resize(-scale)
279279
scale = 0
280280

281-
var number_of_digits = len(coef_string) - scale
281+
var number_of_digits = len(coef) - scale
282282
var number_of_words = number_of_digits // 9
283283
if number_of_digits % 9 != 0:
284284
number_of_words += 1
@@ -288,23 +288,24 @@ struct BigInt(Absable, IntableRaising, Writable):
288288

289289
if scale == 0:
290290
# This is a true integer
291-
var number_of_digits = len(coef_string)
291+
var number_of_digits = len(coef)
292292
var number_of_words = number_of_digits // 9
293293
if number_of_digits % 9 != 0:
294294
number_of_words += 1
295295

296-
var result = Self(empty=True, capacity=number_of_words)
297-
result.sign = sign
298-
299296
var end: Int = number_of_digits
300297
var start: Int
301298
while end >= 9:
302299
start = end - 9
303-
var word = UInt32(Int(coef_string[start:end]))
300+
var word: UInt32 = 0
301+
for digit in coef[start:end]:
302+
word = word * 10 + UInt32(digit[])
304303
result.words.append(word)
305304
end = start
306305
if end > 0:
307-
var word = UInt32(Int(coef_string[0:end]))
306+
var word: UInt32 = 0
307+
for digit in coef[0:end]:
308+
word = word * 10 + UInt32(digit[])
308309
result.words.append(word)
309310

310311
return result
@@ -317,17 +318,22 @@ struct BigInt(Absable, IntableRaising, Writable):
317318
for _ in range(number_of_trailing_zero_words):
318319
result.words.append(UInt32(0))
319320

320-
coef_string += "0" * remaining_trailing_zero_digits
321+
for _ in range(remaining_trailing_zero_digits):
322+
coef.append(UInt8(0))
321323

322324
var end: Int = number_of_digits + scale + remaining_trailing_zero_digits
323325
var start: Int
324326
while end >= 9:
325327
start = end - 9
326-
var word = UInt32(Int(coef_string[start:end]))
328+
var word: UInt32 = 0
329+
for digit in coef[start:end]:
330+
word = word * 10 + UInt32(digit[])
327331
result.words.append(word)
328332
end = start
329333
if end > 0:
330-
var word = UInt32(Int(coef_string[0:end]))
334+
var word: UInt32 = 0
335+
for digit in coef[0:end]:
336+
word = word * 10 + UInt32(digit[])
331337
result.words.append(word)
332338

333339
return result
@@ -464,6 +470,21 @@ struct BigInt(Absable, IntableRaising, Writable):
464470
fn __sub__(self, other: Self) raises -> Self:
465471
return decimojo.bigint.arithmetics.subtract(self, other)
466472

473+
# ===------------------------------------------------------------------=== #
474+
# Basic binary augmented arithmetic assignments dunders
475+
# These methods are called to implement the binary augmented arithmetic
476+
# assignments
477+
# (+=, -=, *=, @=, /=, //=, %=, **=, <<=, >>=, &=, ^=, |=)
478+
# ===------------------------------------------------------------------=== #
479+
480+
@always_inline
481+
fn __iadd__(mut self, other: Self) raises:
482+
self = decimojo.bigint.arithmetics.add(self, other)
483+
484+
@always_inline
485+
fn __isub__(mut self, other: Self) raises:
486+
self = decimojo.bigint.arithmetics.subtract(self, other)
487+
467488
# ===------------------------------------------------------------------=== #
468489
# Other methods
469490
# ===------------------------------------------------------------------=== #

src/decimojo/str.mojo

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,20 @@
1616

1717
"""String manipulation functions."""
1818

19+
import time
1920

20-
fn parse_string_of_number(value: String) raises -> Tuple[String, Int, Bool]:
21+
22+
fn parse_numeric_string(
23+
value: String,
24+
) raises -> Tuple[List[UInt8], Int, Bool]:
2125
"""Parse the string of a number into normalized parts.
2226
2327
Args:
2428
value: The string representation of a number.
2529
2630
Returns:
2731
A tuple of:
28-
- Normalized string which represents an integer.
32+
- Normalized coefficient as List[UInt8] which represents an integer.
2933
- Scale of the number.
3034
- Sign of the number.
3135
@@ -45,12 +49,13 @@ fn parse_string_of_number(value: String) raises -> Tuple[String, Int, Bool]:
4549
4650
Examples:
4751
```console
48-
parse_string("123") -> ("123", 0)
49-
parse_string("123.456") -> ("123456", 3)
50-
parse_string("123.456e3") -> ("123456", 0)
51-
parse_string("123.456e-3") -> ("123456", 6)
52-
parse_string("123.456e+10") -> ("123456", -7)
53-
parse_string("0.00123456") -> ("123456", 8)
52+
parse_string("123") -> (123, 0, False)
53+
parse_string("123.456") -> (123456, 3, False)
54+
parse_string("123.456e3") -> (123456, 0, False)
55+
parse_string("123.456e-3") -> (123456, 6, False)
56+
parse_string("123.456e+10") -> (123456, -7, False)
57+
parse_string("0.00123456") -> (123456, 8, False)
58+
parse_string("-123") -> (123, 0, True)
5459
```
5560
End of examples.
5661
"""
@@ -60,7 +65,7 @@ fn parse_string_of_number(value: String) raises -> Tuple[String, Int, Bool]:
6065
var value_bytes_len = len(value_bytes)
6166

6267
if value_bytes_len == 0:
63-
return Tuple(String(""), 0, False)
68+
raise Error("Error in `parse_numeric_string`: Empty string.")
6469

6570
if value_bytes_len != value_string_slice.char_length():
6671
raise Error(
@@ -81,19 +86,20 @@ fn parse_string_of_number(value: String) raises -> Tuple[String, Int, Bool]:
8186

8287
var mantissa_sign: Bool = False # True if negative
8388
var exponent_sign: Bool = False # True if negative
84-
var coef_string: String = ""
89+
var coef: List[UInt8] = List[UInt8](capacity=value_bytes_len)
8590
var scale: Int = 0
8691
var raw_exponent: Int = 0
8792

88-
for code in value_bytes:
93+
for code_ptr in value_bytes:
94+
var code = code_ptr[]
8995
# If the char is " ", skip it
90-
if code[] == 32:
96+
if code == 32:
9197
pass
9298
# If the char is "," or "_", skip it
93-
elif code[] == 44 or code[] == 95:
99+
elif code == 44 or code == 95:
94100
unexpected_end_char = True
95101
# If the char is "-"
96-
elif code[] == 45:
102+
elif code == 45:
97103
unexpected_end_char = True
98104
if exponent_sign_read:
99105
raise Error("Minus sign cannot appear twice in exponent.")
@@ -106,7 +112,7 @@ fn parse_string_of_number(value: String) raises -> Tuple[String, Int, Bool]:
106112
mantissa_sign = True
107113
mantissa_sign_read = True
108114
# If the char is "+"
109-
elif code[] == 43:
115+
elif code == 43:
110116
unexpected_end_char = True
111117
if exponent_sign_read:
112118
raise Error("Plus sign cannot appear twice in exponent.")
@@ -117,15 +123,15 @@ fn parse_string_of_number(value: String) raises -> Tuple[String, Int, Bool]:
117123
else:
118124
mantissa_sign_read = True
119125
# If the char is "."
120-
elif code[] == 46:
126+
elif code == 46:
121127
unexpected_end_char = False
122128
if decimal_point_read:
123129
raise Error("Decimal point can only appear once.")
124130
else:
125131
decimal_point_read = True
126132
mantissa_sign_read = True
127133
# If the char is "e" or "E"
128-
elif code[] == 101 or code[] == 69:
134+
elif code == 101 or code == 69:
129135
unexpected_end_char = True
130136
if exponent_notation_read:
131137
raise Error("Exponential notation can only appear once.")
@@ -135,7 +141,7 @@ fn parse_string_of_number(value: String) raises -> Tuple[String, Int, Bool]:
135141
exponent_notation_read = True
136142

137143
# If the char is a digit 0
138-
elif code[] == 48:
144+
elif code == 48:
139145
unexpected_end_char = False
140146

141147
# Exponent part
@@ -150,41 +156,42 @@ fn parse_string_of_number(value: String) raises -> Tuple[String, Int, Bool]:
150156
mantissa_start = True
151157

152158
if mantissa_significant_start:
153-
coef_string += "0"
159+
coef.append(0)
154160

155161
if decimal_point_read:
156162
scale += 1
157163

158164
# If the char is a digit 1 - 9
159-
elif code[] >= 49 and code[] <= 57:
165+
elif code >= 49 and code <= 57:
160166
unexpected_end_char = False
161167

162168
# Exponent part
163169
if exponent_notation_read:
164170
exponent_start = True
165-
raw_exponent = raw_exponent * 10 + Int(code[] - 48)
171+
raw_exponent = raw_exponent * 10 + Int(code - 48)
166172

167173
# Mantissa part
168174
else:
169175
mantissa_significant_start = True
170176
mantissa_start = True
171-
coef_string += String(code[] - 48)
177+
coef.append(code - 48)
172178
if decimal_point_read:
173179
scale += 1
174180

175181
else:
176182
raise Error(
177183
"Invalid character in the string of the number: {}".format(
178-
chr(Int(code[]))
184+
chr(Int(code))
179185
)
180186
)
181187

182188
if unexpected_end_char:
183189
raise Error("Unexpected end character in the string of the number.")
184190

185-
if len(coef_string) == 0:
191+
if len(coef) == 0:
192+
# For example, "0000."
186193
if mantissa_start:
187-
coef_string = "0"
194+
coef.append(0)
188195
else:
189196
raise Error("No digits found in the string of the number.")
190197

@@ -200,4 +207,4 @@ fn parse_string_of_number(value: String) raises -> Tuple[String, Int, Bool]:
200207
# 1.234e8 -> 1234e5 -> 1234 and scale = -5
201208
scale -= raw_exponent
202209

203-
return Tuple(coef_string, scale, mantissa_sign)
210+
return Tuple(coef, scale, mantissa_sign)

0 commit comments

Comments
 (0)