From 85b59bd3f050228ea861064cd462ec28d4b82591 Mon Sep 17 00:00:00 2001
From: Yin Jifeng <jifeng.yin@gmail.com>
Date: Wed, 24 Jan 2024 22:26:22 +0800
Subject: [PATCH] truncate decimal fraction based on scale, instead of failure

---
 .../src/legacy/compute/decimal.rs             | 59 +++++++++----------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/crates/polars-arrow/src/legacy/compute/decimal.rs b/crates/polars-arrow/src/legacy/compute/decimal.rs
index e64952d74b995..4afc35f0993d5 100644
--- a/crates/polars-arrow/src/legacy/compute/decimal.rs
+++ b/crates/polars-arrow/src/legacy/compute/decimal.rs
@@ -62,41 +62,36 @@ pub(crate) fn deserialize_decimal(
     // at scale 0; there is no scale -2 where it would only need precision 2).
     let lhs_s = lhs_b.len() as u8 - leading_zeros(lhs_b);
 
+    if lhs_s + scale > precision {
+        // the integer already exceeds the precision
+        return None;
+    }
+
     let abs = parse_integer_checked(lhs_b).and_then(|x| match rhs {
         // A decimal separator was found, so LHS and RHS need to be combined.
-        Some(rhs) => parse_integer_checked(rhs)
-            .map(|y| (x, y, rhs))
-            .and_then(|(lhs, rhs, rhs_b)| {
-                // We include all digits on the RHS, including both leading and trailing zeros,
-                // as significant.  This is consistent with standard scientific practice for writing
-                // numbers.  However, an alternative for parsing could truncate trailing zeros that extend
-                // beyond the scale: we choose not to do this here.
-                let scale_adjust = scale as i8 - rhs_b.len() as i8;
-
-                if (lhs_s + scale > precision)
-                    || (scale_adjust < 0)
-                    || matches!(rhs_b.first(), Some(b'+' | b'-'))
-                {
-                    // LHS significant figures and scale exceed precision,
-                    // RHS significant figures (all digits in RHS) exceed scale, or
-                    // RHS starts with a '+'/'-' sign and the number is not well-formed.
-                    None
-                } else if (rhs_b.len() as u8) == scale {
-                    // RHS has exactly scale significant digits, so no adjustment
-                    // is needed to RHS.
-                    Some((lhs, rhs))
-                } else {
-                    // RHS needs adjustment to scale. scale_adjust is known to be
-                    // positive.
-                    Some((lhs, rhs * 10i128.pow(scale_adjust as u32)))
-                }
+        Some(mut rhs) => {
+            if matches!(rhs.first(), Some(b'+' | b'-')) {
+                // RHS starts with a '+'/'-' sign and the number is not well-formed.
+                return None;
+            }
+            let scale_adjust = if (scale as usize) <= rhs.len() {
+                // Truncate trailing digits that extend beyond the scale
+                rhs = &rhs[..scale as usize];
+                None
+            } else {
+                Some(scale as u32 - rhs.len() as u32)
+            };
+
+            parse_integer_checked(rhs).map(|y| {
+                let lhs = x * 10i128.pow(scale as u32);
+                let rhs = scale_adjust.map_or(y, |s| y * 10i128.pow(s));
+                lhs + rhs
             })
-            .map(|(lhs, rhs)| lhs * 10i128.pow(scale as u32) + rhs),
+        },
         // No decimal separator was found; we have an integer / LHS only.
         None => {
-            if (lhs_s + scale > precision) || lhs_b.is_empty() {
-                // Either the integer itself exceeds the precision, or we simply have
-                // no number at all / an empty string.
+            if lhs_b.is_empty() {
+                // we simply have no number at all / an empty string.
                 return None;
             }
             Some(x * 10i128.pow(scale as u32))
@@ -309,10 +304,10 @@ mod test {
         assert_eq!(deserialize_decimal(val, Some(4), 1), None);
 
         let val = b"1200.010";
-        assert_eq!(deserialize_decimal(val, None, 0), None); // insufficient scale
+        assert_eq!(deserialize_decimal(val, None, 0), Some(1200)); // truncate scale
         assert_eq!(deserialize_decimal(val, None, 3), Some(1200010)); // exact scale
         assert_eq!(deserialize_decimal(val, None, 6), Some(1200010000)); // excess scale
-        assert_eq!(deserialize_decimal(val, Some(7), 0), None); // insufficient precision and scale
+        assert_eq!(deserialize_decimal(val, Some(7), 0), Some(1200)); // sufficient precision and truncate scale
         assert_eq!(deserialize_decimal(val, Some(7), 3), Some(1200010)); // exact precision and scale
         assert_eq!(deserialize_decimal(val, Some(10), 6), Some(1200010000)); // exact precision, excess scale
         assert_eq!(deserialize_decimal(val, Some(5), 6), None); // insufficient precision, excess scale