Skip to content

Commit

Permalink
Extract Units Info (quantity and conversion) (#4151)
Browse files Browse the repository at this point in the history
  • Loading branch information
younies authored Nov 16, 2023
1 parent 01a17cb commit 2b06bea
Show file tree
Hide file tree
Showing 9 changed files with 4,777 additions and 619 deletions.
167 changes: 128 additions & 39 deletions experimental/unitsconversion/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
//!
//! Read more about data providers: [`icu_provider`]
use alloc::borrow::Cow;
use icu_provider::prelude::*;
use zerovec::{ZeroMap, ZeroVec};
use zerovec::{VarZeroVec, ZeroMap, ZeroVec};

#[cfg(feature = "datagen")]
/// The latest minimum set of keys required by this component.
pub const KEYS: &[DataKey] = &[UnitsConstantsV1Marker::KEY];
pub const KEYS: &[DataKey] = &[UnitsInfoV1Marker::KEY];

/// This type encapsulates all the constant data required for unit conversions.
///
Expand All @@ -23,7 +24,7 @@ pub const KEYS: &[DataKey] = &[UnitsConstantsV1Marker::KEY];
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[icu_provider::data_struct(marker(UnitsConstantsV1Marker, "units/constants@1", singleton))]
#[icu_provider::data_struct(marker(UnitsInfoV1Marker, "units/info@1", singleton))]
#[derive(Clone, PartialEq, Debug)]
#[cfg_attr(
feature = "datagen",
Expand All @@ -32,34 +33,47 @@ pub const KEYS: &[DataKey] = &[UnitsConstantsV1Marker::KEY];
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[yoke(prove_covariance_manually)]
pub struct UnitsConstantsV1<'data> {
// TODO(#3882): Use a more efficient representation for the values with numerators and denominators.
// Also, the constant types.
/// Maps from constant name (e.g. ft_to_m) to the value of the constant (e.g. 0.3048).
pub struct UnitsInfoV1<'data> {
// TODO(#4313).
/// Maps from unit name (e.g. foot) to the index of the unit in the `unit_quantity` vector.
#[cfg_attr(feature = "serde", serde(borrow))]
pub constants_map: ZeroMap<'data, str, ConstantValueULE>,
pub units_info: ZeroMap<'data, str, UnitsInfoIndex>,

/// Contains the dimensions information for the units.
/// For example, the dimension for the unit `foot` is `length`.
#[cfg_attr(feature = "serde", serde(borrow))]
pub unit_dimensions: VarZeroVec<'data, DimensionULE>,

/// Contains the conversion information, such as the conversion rate and the base unit.
/// For example, the conversion information for the unit `foot` is `1 foot = 0.3048 meter`.
#[cfg_attr(feature = "serde", serde(borrow))]
pub convert_infos: VarZeroVec<'data, ConversionInfoULE>,
}

/// This enum is used to represent the type of a constant value.
/// It can be either `ConstantType::Actual` or `ConstantType::Approximate`.
/// If the constant type is `ConstantType::Approximate`, it indicates that the value is not numerically accurate.
#[zerovec::make_ule(ConstantExactnessULE)]
#[zerovec::make_ule(UnitsInfoIndexULE)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
derive(databake::Bake),
databake(path = icu_unitsconversion::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[repr(u8)]
pub enum ConstantExactness {
#[default]
Exact = 0,
Approximate = 1,
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Copy, Debug, Clone, Default, PartialEq, PartialOrd, Eq, Ord)]
pub struct UnitsInfoIndex {
// TODO(#4313).
/// Contains the index of the dimension in the `unit_dimensions` vector.
/// If the unit does not have a quantity, this field is `None`.
pub dimension: Option<u16>,

// TODO(#4313).
/// Contains the index of the conversion info in the `convert_infos` vector.
/// If the unit does not have a convert unit, this field is `None`.
pub convert_info: Option<u16>,
}

/// This enum is used to represent the sign of a constant value.
#[zerovec::make_ule(SignULE)]
/// Specifies if the unit is a base unit or a derived unit.
/// If derived, this means each unit is derived from a base unit.
/// For example: "foot-per-second" is derived from "meter" and "second".
#[zerovec::make_ule(DerivationSpecifierULE)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
Expand All @@ -68,15 +82,13 @@ pub enum ConstantExactness {
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[repr(u8)]
pub enum Sign {
pub enum DerivationSpecifier {
#[default]
Positive = 0,
Negative = 1,
Base = 0,
Derived = 1,
}

// TODO(#4098): Improve the ULE representation. Consider using a single byte for sign and type representation.
/// This struct encapsulates a constant value, comprising a numerator, denominator, sign, and type.
#[zerovec::make_varule(ConstantValueULE)]
#[zerovec::make_varule(DimensionULE)]
#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[cfg_attr(
feature = "datagen",
Expand All @@ -94,20 +106,97 @@ pub enum Sign {
zerovec::derive(Deserialize)
)]
#[zerovec::derive(Debug)]
pub struct ConstantValue<'data> {
// TODO(https://github.com/unicode-org/icu4x/issues/4092).
/// The numerator of the constant value in bytes starting with the least significant byte.
pub struct Dimension<'data> {
/// Contains the quantity name.
// TODO(#4173): Consider using an enum for the quantity name.
#[cfg_attr(feature = "serde", serde(borrow))]
pub numerator: ZeroVec<'data, u8>,
pub quantity: Cow<'data, str>,

/// Represents the simplicity of the quantity.
pub specifier: DerivationSpecifier,
}

// TODO(https://github.com/unicode-org/icu4x/issues/4092).
/// The denominator of the constant value in bytes starting with the least significant byte.
/// Represents the conversion information for a unit.
/// Which includes the base unit (the unit which the unit is converted to), the conversion factor, and the offset.
#[zerovec::make_varule(ConversionInfoULE)]
#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[cfg_attr(
feature = "datagen",
derive(databake::Bake),
databake(path = icu_unitsconversion::provider),
)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize),
zerovec::derive(Serialize)
)]
#[cfg_attr(
feature = "serde",
derive(serde::Deserialize),
zerovec::derive(Deserialize)
)]
#[zerovec::derive(Debug)]
pub struct ConversionInfo<'data> {
/// Contains the base unit which the unit is converted to.
#[cfg_attr(feature = "serde", serde(borrow))]
pub denominator: ZeroVec<'data, u8>,
pub base_unit: Cow<'data, str>,

/// Determines whether the constant value is positive or negative.
pub sign: Sign,
/// Represents the numerator of the conversion factor.
#[cfg_attr(feature = "serde", serde(borrow))]
pub factor_num: ZeroVec<'data, u8>,

/// Determines whether the constant value is actual or approximate.
pub constant_exactness: ConstantExactness,
/// Represents the denominator of the conversion factor.
#[cfg_attr(feature = "serde", serde(borrow))]
pub factor_den: ZeroVec<'data, u8>,

/// Represents the sign of the conversion factor.
pub factor_sign: Sign,

// TODO(#4311).
/// Represents the numerator of the offset.
#[cfg_attr(feature = "serde", serde(borrow))]
pub offset_num: ZeroVec<'data, u8>,

// TODO(#4311).
/// Represents the denominator of the offset.
#[cfg_attr(feature = "serde", serde(borrow))]
pub offset_den: ZeroVec<'data, u8>,

/// Represents the sign of the offset.
pub offset_sign: Sign,

/// Represents the exactness of the conversion factor.
pub exactness: Exactness,
}

/// This enum is used to represent the sign of a constant value.
#[zerovec::make_ule(SignULE)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_unitsconversion::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[repr(u8)]
pub enum Sign {
#[default]
Positive = 0,
Negative = 1,
}

/// This enum is used to represent the exactness of a factor
#[zerovec::make_ule(ExactnessULE)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_unitsconversion::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[repr(u8)]
pub enum Exactness {
#[default]
Exact = 0,
Approximate = 1,
}
2 changes: 1 addition & 1 deletion provider/datagen/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ macro_rules! registry {
registry!(
#[cfg(test)]
icu_singlenumberformatter::provider::CurrencyEssentialsV1Marker = "currency/essentials@1",
icu_unitsconversion::provider::UnitsConstantsV1Marker = "units/constants@1",
icu_unitsconversion::provider::UnitsInfoV1Marker = "units/info@1",
#[cfg(any(all(), feature = "icu_calendar"))]
icu_calendar::provider::JapaneseErasV1Marker = "calendar/japanese@1",
icu_calendar::provider::JapaneseExtendedErasV1Marker = "calendar/japanext@1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,58 @@ pub struct Constant {
pub description: Option<String>,
}

#[derive(PartialEq, Debug, Deserialize)]
pub struct Quantity {
#[serde(rename = "_quantity")]
pub quantity: String,

#[serde(rename = "_status")]
pub status: Option<String>,

#[serde(rename = "_description")]
pub description: Option<String>,
}

#[derive(PartialEq, Debug, Deserialize)]
pub struct ConvertUnit {
#[serde(rename = "_baseUnit")]
pub base_unit: String,

#[serde(rename = "_factor")]
pub factor: Option<String>,

#[serde(rename = "_offset")]
pub offset: Option<String>,
}

#[derive(PartialEq, Debug, Deserialize)]
pub struct UnitConstants {
#[serde(flatten)]
pub constants: BTreeMap<String, Constant>,
}

#[derive(PartialEq, Debug, Deserialize)]
pub struct UnitQuantities {
#[serde(flatten)]
pub quantities: BTreeMap<String, Quantity>,
}

#[derive(PartialEq, Debug, Deserialize)]
pub struct ConvertUnits {
#[serde(flatten)]
pub convert_units: BTreeMap<String, ConvertUnit>,
}

#[derive(PartialEq, Debug, Deserialize)]
pub struct Supplemental {
#[serde(rename = "unitConstants")]
pub unit_constants: UnitConstants,

#[serde(rename = "unitQuantities")]
pub unit_quantities: UnitQuantities,

#[serde(rename = "convertUnits")]
pub convert_units: ConvertUnits,
}

#[derive(Deserialize)]
Expand Down
Loading

0 comments on commit 2b06bea

Please sign in to comment.