From 85b59bd3f050228ea861064cd462ec28d4b82591 Mon Sep 17 00:00:00 2001 From: Yin Jifeng Date: Wed, 24 Jan 2024 22:26:22 +0800 Subject: [PATCH] truncate decimal fraction based on scale, instead of failure --- .../src/legacy/compute/decimal.rs | 59 +++++++++---------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/crates/polars-arrow/src/legacy/compute/decimal.rs b/crates/polars-arrow/src/legacy/compute/decimal.rs index e64952d74b995..4afc35f0993d5 100644 --- a/crates/polars-arrow/src/legacy/compute/decimal.rs +++ b/crates/polars-arrow/src/legacy/compute/decimal.rs @@ -62,41 +62,36 @@ pub(crate) fn deserialize_decimal( // at scale 0; there is no scale -2 where it would only need precision 2). let lhs_s = lhs_b.len() as u8 - leading_zeros(lhs_b); + if lhs_s + scale > precision { + // the integer already exceeds the precision + return None; + } + let abs = parse_integer_checked(lhs_b).and_then(|x| match rhs { // A decimal separator was found, so LHS and RHS need to be combined. - Some(rhs) => parse_integer_checked(rhs) - .map(|y| (x, y, rhs)) - .and_then(|(lhs, rhs, rhs_b)| { - // We include all digits on the RHS, including both leading and trailing zeros, - // as significant. This is consistent with standard scientific practice for writing - // numbers. However, an alternative for parsing could truncate trailing zeros that extend - // beyond the scale: we choose not to do this here. - let scale_adjust = scale as i8 - rhs_b.len() as i8; - - if (lhs_s + scale > precision) - || (scale_adjust < 0) - || matches!(rhs_b.first(), Some(b'+' | b'-')) - { - // LHS significant figures and scale exceed precision, - // RHS significant figures (all digits in RHS) exceed scale, or - // RHS starts with a '+'/'-' sign and the number is not well-formed. - None - } else if (rhs_b.len() as u8) == scale { - // RHS has exactly scale significant digits, so no adjustment - // is needed to RHS. - Some((lhs, rhs)) - } else { - // RHS needs adjustment to scale. scale_adjust is known to be - // positive. - Some((lhs, rhs * 10i128.pow(scale_adjust as u32))) - } + Some(mut rhs) => { + if matches!(rhs.first(), Some(b'+' | b'-')) { + // RHS starts with a '+'/'-' sign and the number is not well-formed. + return None; + } + let scale_adjust = if (scale as usize) <= rhs.len() { + // Truncate trailing digits that extend beyond the scale + rhs = &rhs[..scale as usize]; + None + } else { + Some(scale as u32 - rhs.len() as u32) + }; + + parse_integer_checked(rhs).map(|y| { + let lhs = x * 10i128.pow(scale as u32); + let rhs = scale_adjust.map_or(y, |s| y * 10i128.pow(s)); + lhs + rhs }) - .map(|(lhs, rhs)| lhs * 10i128.pow(scale as u32) + rhs), + }, // No decimal separator was found; we have an integer / LHS only. None => { - if (lhs_s + scale > precision) || lhs_b.is_empty() { - // Either the integer itself exceeds the precision, or we simply have - // no number at all / an empty string. + if lhs_b.is_empty() { + // we simply have no number at all / an empty string. return None; } Some(x * 10i128.pow(scale as u32)) @@ -309,10 +304,10 @@ mod test { assert_eq!(deserialize_decimal(val, Some(4), 1), None); let val = b"1200.010"; - assert_eq!(deserialize_decimal(val, None, 0), None); // insufficient scale + assert_eq!(deserialize_decimal(val, None, 0), Some(1200)); // truncate scale assert_eq!(deserialize_decimal(val, None, 3), Some(1200010)); // exact scale assert_eq!(deserialize_decimal(val, None, 6), Some(1200010000)); // excess scale - assert_eq!(deserialize_decimal(val, Some(7), 0), None); // insufficient precision and scale + assert_eq!(deserialize_decimal(val, Some(7), 0), Some(1200)); // sufficient precision and truncate scale assert_eq!(deserialize_decimal(val, Some(7), 3), Some(1200010)); // exact precision and scale assert_eq!(deserialize_decimal(val, Some(10), 6), Some(1200010000)); // exact precision, excess scale assert_eq!(deserialize_decimal(val, Some(5), 6), None); // insufficient precision, excess scale