@@ -741,72 +741,57 @@ fn parse_e_notation<T: DecimalType>(
741741 let mut exp: i16 = 0 ;
742742 let base = T :: Native :: usize_as ( 10 ) ;
743743
744- let mut exp_start: bool = false ;
745744 // e has a plus sign
746745 let mut pos_shift_direction: bool = true ;
747746
748- // skip to point or exponent index
749- let mut bs;
750- if fractionals > 0 {
751- // it's a fraction, so the point index needs to be skipped, so +1
752- bs = s. as_bytes ( ) . iter ( ) . skip ( index + fractionals as usize + 1 ) ;
753- } else {
754- // it's actually an integer that is already written into the result, so let's skip on to e
755- bs = s. as_bytes ( ) . iter ( ) . skip ( index) ;
756- }
747+ // skip to the exponent index directly or just after any processed fractionals
748+ let mut bs = s. as_bytes ( ) . iter ( ) . skip ( index + fractionals as usize ) ;
757749
758- while let Some ( b) = bs. next ( ) {
750+ // This function is only called from `parse_decimal`, in which we skip parsing any fractionals
751+ // after we reach `scale` digits, not knowing ahead of time whether the decimal contains an
752+ // e-notation or not.
753+ // So once we do hit into an e-notation, and drop down into this function, we need to parse the
754+ // remaining unprocessed fractionals too, since otherwise we might lose precision.
755+ for b in bs. by_ref ( ) {
759756 match b {
760757 b'0' ..=b'9' => {
761758 result = result. mul_wrapping ( base) ;
762759 result = result. add_wrapping ( T :: Native :: usize_as ( ( b - b'0' ) as usize ) ) ;
763- if fractionals > 0 {
764- fractionals += 1 ;
765- }
760+ fractionals += 1 ;
766761 digits += 1 ;
767762 }
768- & b'e' | & b'E' => {
769- exp_start = true ;
763+ b'e' | b'E' => {
764+ break ;
770765 }
771766 _ => {
772767 return Err ( ArrowError :: ParseError ( format ! (
773768 "can't parse the string value {s} to decimal"
774769 ) ) ) ;
775770 }
776771 } ;
772+ }
777773
778- if exp_start {
779- pos_shift_direction = match bs. next ( ) {
780- Some ( & b'-' ) => false ,
781- Some ( & b'+' ) => true ,
782- Some ( b) => {
783- if !b. is_ascii_digit ( ) {
784- return Err ( ArrowError :: ParseError ( format ! (
785- "can't parse the string value {s} to decimal"
786- ) ) ) ;
787- }
788-
789- exp *= 10 ;
790- exp += ( b - b'0' ) as i16 ;
791-
792- true
793- }
794- None => {
795- return Err ( ArrowError :: ParseError ( format ! (
796- "can't parse the string value {s} to decimal"
797- ) ) ) ;
798- }
799- } ;
800-
801- for b in bs. by_ref ( ) {
802- if !b. is_ascii_digit ( ) {
803- return Err ( ArrowError :: ParseError ( format ! (
804- "can't parse the string value {s} to decimal"
805- ) ) ) ;
806- }
774+ // parse the exponent itself
775+ let mut signed = false ;
776+ for b in bs {
777+ match b {
778+ b'-' if !signed => {
779+ pos_shift_direction = false ;
780+ signed = true ;
781+ }
782+ b'+' if !signed => {
783+ pos_shift_direction = true ;
784+ signed = true ;
785+ }
786+ b if b. is_ascii_digit ( ) => {
807787 exp *= 10 ;
808788 exp += ( b - b'0' ) as i16 ;
809789 }
790+ _ => {
791+ return Err ( ArrowError :: ParseError ( format ! (
792+ "can't parse the string value {s} to decimal"
793+ ) ) ) ;
794+ }
810795 }
811796 }
812797
@@ -850,7 +835,11 @@ fn parse_e_notation<T: DecimalType>(
850835}
851836
852837/// Parse the string format decimal value to i128/i256 format and checking the precision and scale.
853- /// The result value can't be out of bounds.
838+ /// Expected behavior:
839+ /// - The result value can't be out of bounds.
840+ /// - When parsing a decimal with scale 0, all fractional digits will be discarded. The final
841+ /// fractional digits may be a subset or a superset of the digits after the decimal point when
842+ /// e-notation is used.
854843pub fn parse_decimal < T : DecimalType > (
855844 s : & str ,
856845 precision : u8 ,
@@ -862,18 +851,24 @@ pub fn parse_decimal<T: DecimalType>(
862851 let base = T :: Native :: usize_as ( 10 ) ;
863852
864853 let bs = s. as_bytes ( ) ;
865- let ( signed, negative) = match bs. first ( ) {
866- Some ( b'-' ) => ( true , true ) ,
867- Some ( b'+' ) => ( true , false ) ,
868- _ => ( false , false ) ,
869- } ;
870854
871- if bs. is_empty ( ) || signed && bs. len ( ) == 1 {
855+ if !bs
856+ . last ( )
857+ . is_some_and ( |b| b. is_ascii_digit ( ) || ( b == & b'.' && s. len ( ) > 1 ) )
858+ {
859+ // If the last character is not a digit (or a decimal point prefixed with some digits), then
860+ // it's not a valid decimal.
872861 return Err ( ArrowError :: ParseError ( format ! (
873862 "can't parse the string value {s} to decimal"
874863 ) ) ) ;
875864 }
876865
866+ let ( signed, negative) = match bs. first ( ) {
867+ Some ( b'-' ) => ( true , true ) ,
868+ Some ( b'+' ) => ( true , false ) ,
869+ _ => ( false , false ) ,
870+ } ;
871+
877872 // Iterate over the raw input bytes, skipping the sign if any
878873 let mut bs = bs. iter ( ) . enumerate ( ) . skip ( signed as usize ) ;
879874
@@ -903,7 +898,7 @@ pub fn parse_decimal<T: DecimalType>(
903898 digits as u16 ,
904899 fractionals as i16 ,
905900 result,
906- point_index,
901+ point_index + 1 ,
907902 precision as u16 ,
908903 scale as i16 ,
909904 ) ?;
@@ -916,7 +911,7 @@ pub fn parse_decimal<T: DecimalType>(
916911 "can't parse the string value {s} to decimal"
917912 ) ) ) ;
918913 }
919- if fractionals == scale && scale != 0 {
914+ if fractionals == scale {
920915 // We have processed all the digits that we need. All that
921916 // is left is to validate that the rest of the string contains
922917 // valid digits.
@@ -931,13 +926,6 @@ pub fn parse_decimal<T: DecimalType>(
931926 if is_e_notation {
932927 break ;
933928 }
934-
935- // Fail on "."
936- if digits == 0 {
937- return Err ( ArrowError :: ParseError ( format ! (
938- "can't parse the string value {s} to decimal"
939- ) ) ) ;
940- }
941929 }
942930 b'e' | b'E' => {
943931 result = parse_e_notation :: < T > (
@@ -2613,6 +2601,9 @@ mod tests {
26132601 "1.1e.12" ,
26142602 "1.23e+3." ,
26152603 "1.23e+3.1" ,
2604+ "1e" ,
2605+ "1e+" ,
2606+ "1e-" ,
26162607 ] ;
26172608 for s in can_not_parse_tests {
26182609 let result_128 = parse_decimal :: < Decimal128Type > ( s, 20 , 3 ) ;
@@ -2636,6 +2627,7 @@ mod tests {
26362627 ( "12345678908765.123456" , 3 ) ,
26372628 ( "123456789087651234.56e-4" , 3 ) ,
26382629 ( "1234560000000" , 0 ) ,
2630+ ( "12345678900.0" , 0 ) ,
26392631 ( "1.23456e12" , 0 ) ,
26402632 ] ;
26412633 for ( s, scale) in overflow_parse_tests {
@@ -2752,6 +2744,45 @@ mod tests {
27522744 let result = parse_decimal :: < Decimal256Type > ( s, 76 , scale) ;
27532745 assert_eq ! ( i, result. unwrap( ) ) ;
27542746 }
2747+
2748+ let zero_scale_tests = [
2749+ ( ".123" , 0 , 3 ) ,
2750+ ( "0.123" , 0 , 3 ) ,
2751+ ( "1.0" , 1 , 3 ) ,
2752+ ( "1.2" , 1 , 3 ) ,
2753+ ( "1.00" , 1 , 3 ) ,
2754+ ( "1.23" , 1 , 3 ) ,
2755+ ( "1.000" , 1 , 3 ) ,
2756+ ( "1.123" , 1 , 3 ) ,
2757+ ( "123.0" , 123 , 3 ) ,
2758+ ( "123.4" , 123 , 3 ) ,
2759+ ( "123.00" , 123 , 3 ) ,
2760+ ( "123.45" , 123 , 3 ) ,
2761+ ( "123.000000000000000000004" , 123 , 3 ) ,
2762+ ( "0.123e2" , 12 , 3 ) ,
2763+ ( "0.123e4" , 1230 , 10 ) ,
2764+ ( "1.23e4" , 12300 , 10 ) ,
2765+ ( "12.3e4" , 123000 , 10 ) ,
2766+ ( "123e4" , 1230000 , 10 ) ,
2767+ (
2768+ "20000000000000000000000000000000000002.0" ,
2769+ 20000000000000000000000000000000000002 ,
2770+ 38 ,
2771+ ) ,
2772+ ] ;
2773+ for ( s, i, precision) in zero_scale_tests {
2774+ let result_128 = parse_decimal :: < Decimal128Type > ( s, precision, 0 ) . unwrap ( ) ;
2775+ assert_eq ! ( i, result_128) ;
2776+ }
2777+
2778+ let can_not_parse_zero_scale = [ "." , "blag" , "" , "+" , "-" , "e" ] ;
2779+ for s in can_not_parse_zero_scale {
2780+ let result_128 = parse_decimal :: < Decimal128Type > ( s, 5 , 0 ) ;
2781+ assert_eq ! (
2782+ format!( "Parser error: can't parse the string value {s} to decimal" ) ,
2783+ result_128. unwrap_err( ) . to_string( ) ,
2784+ ) ;
2785+ }
27552786 }
27562787
27572788 #[ test]
0 commit comments