Skip to content

Commit

Permalink
truncate decimal fraction based on scale, instead of failure
Browse files Browse the repository at this point in the history
  • Loading branch information
flisky committed Jan 24, 2024
1 parent 412053d commit 85b59bd
Showing 1 changed file with 27 additions and 32 deletions.
59 changes: 27 additions & 32 deletions crates/polars-arrow/src/legacy/compute/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,41 +62,36 @@ pub(crate) fn deserialize_decimal(
// at scale 0; there is no scale -2 where it would only need precision 2).
let lhs_s = lhs_b.len() as u8 - leading_zeros(lhs_b);

if lhs_s + scale > precision {
// the integer already exceeds the precision
return None;
}

let abs = parse_integer_checked(lhs_b).and_then(|x| match rhs {
// A decimal separator was found, so LHS and RHS need to be combined.
Some(rhs) => parse_integer_checked(rhs)
.map(|y| (x, y, rhs))
.and_then(|(lhs, rhs, rhs_b)| {
// We include all digits on the RHS, including both leading and trailing zeros,
// as significant. This is consistent with standard scientific practice for writing
// numbers. However, an alternative for parsing could truncate trailing zeros that extend
// beyond the scale: we choose not to do this here.
let scale_adjust = scale as i8 - rhs_b.len() as i8;

if (lhs_s + scale > precision)
|| (scale_adjust < 0)
|| matches!(rhs_b.first(), Some(b'+' | b'-'))
{
// LHS significant figures and scale exceed precision,
// RHS significant figures (all digits in RHS) exceed scale, or
// RHS starts with a '+'/'-' sign and the number is not well-formed.
None
} else if (rhs_b.len() as u8) == scale {
// RHS has exactly scale significant digits, so no adjustment
// is needed to RHS.
Some((lhs, rhs))
} else {
// RHS needs adjustment to scale. scale_adjust is known to be
// positive.
Some((lhs, rhs * 10i128.pow(scale_adjust as u32)))
}
Some(mut rhs) => {
if matches!(rhs.first(), Some(b'+' | b'-')) {
// RHS starts with a '+'/'-' sign and the number is not well-formed.
return None;
}
let scale_adjust = if (scale as usize) <= rhs.len() {
// Truncate trailing digits that extend beyond the scale
rhs = &rhs[..scale as usize];
None
} else {
Some(scale as u32 - rhs.len() as u32)
};

parse_integer_checked(rhs).map(|y| {
let lhs = x * 10i128.pow(scale as u32);
let rhs = scale_adjust.map_or(y, |s| y * 10i128.pow(s));
lhs + rhs
})
.map(|(lhs, rhs)| lhs * 10i128.pow(scale as u32) + rhs),
},
// No decimal separator was found; we have an integer / LHS only.
None => {
if (lhs_s + scale > precision) || lhs_b.is_empty() {
// Either the integer itself exceeds the precision, or we simply have
// no number at all / an empty string.
if lhs_b.is_empty() {
// we simply have no number at all / an empty string.
return None;
}
Some(x * 10i128.pow(scale as u32))
Expand Down Expand Up @@ -309,10 +304,10 @@ mod test {
assert_eq!(deserialize_decimal(val, Some(4), 1), None);

let val = b"1200.010";
assert_eq!(deserialize_decimal(val, None, 0), None); // insufficient scale
assert_eq!(deserialize_decimal(val, None, 0), Some(1200)); // truncate scale
assert_eq!(deserialize_decimal(val, None, 3), Some(1200010)); // exact scale
assert_eq!(deserialize_decimal(val, None, 6), Some(1200010000)); // excess scale
assert_eq!(deserialize_decimal(val, Some(7), 0), None); // insufficient precision and scale
assert_eq!(deserialize_decimal(val, Some(7), 0), Some(1200)); // sufficient precision and truncate scale
assert_eq!(deserialize_decimal(val, Some(7), 3), Some(1200010)); // exact precision and scale
assert_eq!(deserialize_decimal(val, Some(10), 6), Some(1200010000)); // exact precision, excess scale
assert_eq!(deserialize_decimal(val, Some(5), 6), None); // insufficient precision, excess scale
Expand Down

0 comments on commit 85b59bd

Please sign in to comment.