Skip to content

Commit

Permalink
Add icu_preferences util (#4996)
Browse files Browse the repository at this point in the history
  • Loading branch information
zbraniecki authored Jun 19, 2024
1 parent 25e9094 commit f3b0573
Show file tree
Hide file tree
Showing 53 changed files with 2,098 additions and 33 deletions.
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ members = [
"utils/ixdtf",
"utils/litemap",
"utils/pattern",
"utils/preferences",
"utils/resb",
"utils/tinystr",
"utils/tzif",
Expand Down
2 changes: 1 addition & 1 deletion components/datetime/src/options/preferences.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl Bag {
const H24: Subtag = subtag!("h24");
let hour_cycle = match data_locale
.get_unicode_ext(&key!("hc"))
.and_then(|v| v.as_single_subtag().copied())
.and_then(|v| v.into_single_subtag())
{
Some(H11) => Some(HourCycle::H11),
Some(H12) => Some(HourCycle::H12),
Expand Down
5 changes: 2 additions & 3 deletions components/datetime/src/provider/date_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,8 @@ where
// Skeleton data for ethioaa is stored under ethiopic
if cal_val == &value!("ethioaa") {
locale.set_unicode_ext(key!("ca"), value!("ethiopic"));
} else if cal_val == &value!("islamic")
|| cal_val == &value!("islamicc")
|| cal_val.as_subtags_slice().first() == Some(&subtag!("islamic"))
} else if cal_val == &value!("islamicc")
|| cal_val.get_subtag(0) == Some(&subtag!("islamic"))
{
// All islamic calendars store skeleton data under islamic, not their individual extension keys
locale.set_unicode_ext(key!("ca"), value!("islamic"));
Expand Down
21 changes: 17 additions & 4 deletions components/locale_core/src/extensions/other/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,11 @@ impl Other {
iter.next();
}

Ok(Self::from_short_slice_unchecked(ext, keys))
if keys.is_empty() {
Err(ParseError::InvalidExtension)
} else {
Ok(Self::from_short_slice_unchecked(ext, keys))
}
}

/// Gets the tag character for this extension as a &str.
Expand Down Expand Up @@ -187,6 +191,9 @@ writeable::impl_display_with_writeable!(Other);

impl writeable::Writeable for Other {
fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
if self.keys.is_empty() {
return Ok(());
}
sink.write_str(self.get_ext_str())?;
for key in self.keys.iter() {
sink.write_char('-')?;
Expand All @@ -197,6 +204,9 @@ impl writeable::Writeable for Other {
}

fn writeable_length_hint(&self) -> writeable::LengthHint {
if self.keys.is_empty() {
return writeable::LengthHint::exact(0);
};
let mut result = writeable::LengthHint::exact(1);
for key in self.keys.iter() {
result += writeable::Writeable::writeable_length_hint(key) + 1;
Expand All @@ -206,7 +216,7 @@ impl writeable::Writeable for Other {

fn write_to_string(&self) -> alloc::borrow::Cow<str> {
if self.keys.is_empty() {
return alloc::borrow::Cow::Borrowed(self.get_ext_str());
return alloc::borrow::Cow::Borrowed("");
}
let mut string =
alloc::string::String::with_capacity(self.writeable_length_hint().capacity());
Expand All @@ -221,7 +231,10 @@ mod tests {

#[test]
fn test_other_extension_fromstr() {
let pe: Other = "o-foo-bar".parse().expect("Failed to parse Other");
assert_eq!(pe.to_string(), "o-foo-bar");
let oe: Other = "o-foo-bar".parse().expect("Failed to parse Other");
assert_eq!(oe.to_string(), "o-foo-bar");

let oe: Result<Other, _> = "o".parse();
assert!(oe.is_err());
}
}
9 changes: 8 additions & 1 deletion components/locale_core/src/extensions/private/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,11 @@ impl Private {
.map(Subtag::try_from_bytes)
.collect::<Result<ShortBoxSlice<_>, _>>()?;

Ok(Self(keys))
if keys.is_empty() {
Err(ParseError::InvalidExtension)
} else {
Ok(Self(keys))
}
}

pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
Expand Down Expand Up @@ -220,5 +224,8 @@ mod tests {
fn test_private_extension_fromstr() {
let pe: Private = "x-foo-bar-l-baz".parse().expect("Failed to parse Private");
assert_eq!(pe.to_string(), "x-foo-bar-l-baz");

let pe: Result<Private, _> = "x".parse();
assert!(pe.is_err());
}
}
15 changes: 11 additions & 4 deletions components/locale_core/src/extensions/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,14 @@ impl Transform {
tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
}

Ok(Self {
lang: tlang,
fields: tfields.into(),
})
if tlang.is_none() && tfields.is_empty() {
Err(ParseError::InvalidExtension)
} else {
Ok(Self {
lang: tlang,
fields: tfields.into(),
})
}
}

pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
Expand Down Expand Up @@ -299,5 +303,8 @@ mod tests {
.parse()
.expect("Failed to parse Transform");
assert_eq!(te.to_string(), "t-en-us-h0-hybrid");

let te: Result<Transform, _> = "t".parse();
assert!(te.is_err());
}
}
5 changes: 5 additions & 0 deletions components/locale_core/src/extensions/unicode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ mod attribute;
mod attributes;
mod key;
mod keywords;
mod subdivision;
mod value;

use core::cmp::Ordering;
Expand All @@ -41,6 +42,7 @@ pub use attributes::Attributes;
#[doc(inline)]
pub use key::{key, Key};
pub use keywords::Keywords;
pub use subdivision::{subdivision_suffix, SubdivisionId, SubdivisionSuffix};
#[doc(inline)]
pub use value::{value, Value};

Expand Down Expand Up @@ -242,5 +244,8 @@ mod tests {
fn test_unicode_extension_fromstr() {
let ue: Unicode = "u-foo-hc-h12".parse().expect("Failed to parse Unicode");
assert_eq!(ue.to_string(), "u-foo-hc-h12");

let ue: Result<Unicode, _> = "u".parse();
assert!(ue.is_err());
}
}
167 changes: 167 additions & 0 deletions components/locale_core/src/extensions/unicode/subdivision.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use core::str::FromStr;

use crate::parser::ParseError;
use crate::subtags::Region;

impl_tinystr_subtag!(
/// A subdivision suffix used in [`SubdivisionId`].
///
/// This suffix represents a specific subdivision code under a given [`Region`].
/// For example the value of [`SubdivisionId`] may be `gbsct`, where the [`SubdivisionSuffix`]
/// is `sct` for Scotland.
///
/// Such a value associated with a key `rg` means that the locale should use Unit Preferences
/// (default calendar, currency, week data, time cycle, measurement system) for Scotland, even if the
/// [`LanguageIdentifier`](crate::LanguageIdentifier) is `en-US`.
///
/// A subdivision suffix has to be a sequence of alphanumerical characters no
/// shorter than one and no longer than four characters.
///
///
/// # Examples
///
/// ```
/// use icu::locale::extensions::unicode::{subdivision_suffix, SubdivisionSuffix};
///
/// let ss: SubdivisionSuffix =
/// "sct".parse().expect("Failed to parse a SubdivisionSuffix.");
///
/// assert_eq!(ss, subdivision_suffix!("sct"));
/// ```
SubdivisionSuffix,
extensions::unicode,
subdivision_suffix,
extensions_unicode_subdivision_suffix,
1..=4,
s,
s.is_ascii_alphanumeric(),
s.to_ascii_lowercase(),
s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
InvalidExtension,
["sct"],
["toolooong"],
);

/// A Subivision Id as defined in [`Unicode Locale Identifier`].
///
/// Subdivision Id is used in [`Unicode`] extensions:
/// * `rg` - Regional Override
/// * `sd` - Regional Subdivision
///
/// In both cases the subdivision is composed of a [`Region`] and a [`SubdivisionSuffix`] which represents
/// different meaning depending on the key.
///
/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/tr35.html#unicode_subdivision_id
/// [`Unicode`]: crate::extensions::unicode::Unicode
///
/// # Examples
///
/// ```
/// use icu::locale::{
/// subtags::region,
/// extensions::unicode::{subdivision_suffix, SubdivisionId}
/// };
///
/// let ss = subdivision_suffix!("zzzz");
/// let region = region!("gb");
///
/// let si = SubdivisionId::new(region, ss);
///
/// assert_eq!(si.to_string(), "gbzzzz");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
#[non_exhaustive]
pub struct SubdivisionId {
/// A region field of a Subdivision Id.
pub region: Region,
/// A subdivision suffix field of a Subdivision Id.
pub suffix: SubdivisionSuffix,
}

impl SubdivisionId {
/// Returns a new [`SubdivisionId`].
///
/// # Examples
///
/// ```
/// use icu::locale::{
/// subtags::region,
/// extensions::unicode::{subdivision_suffix, SubdivisionId}
/// };
///
/// let ss = subdivision_suffix!("zzzz");
/// let region = region!("gb");
///
/// let si = SubdivisionId::new(region, ss);
///
/// assert_eq!(si.to_string(), "gbzzzz");
/// ```
pub const fn new(region: Region, suffix: SubdivisionSuffix) -> Self {
Self { region, suffix }
}

pub(crate) fn try_from_bytes(input: &[u8]) -> Result<Self, ParseError> {
let is_alpha = input
.first()
.and_then(|b| {
b.is_ascii_alphabetic()
.then_some(true)
.or_else(|| b.is_ascii_digit().then_some(false))
})
.ok_or(ParseError::InvalidExtension)?;
let region_len = if is_alpha { 2 } else { 3 };
if input.len() < region_len + 1 {
return Err(ParseError::InvalidExtension);
}
let (region_bytes, suffix_bytes) = input.split_at(region_len);
let region =
Region::try_from_bytes(region_bytes).map_err(|_| ParseError::InvalidExtension)?;
let suffix = SubdivisionSuffix::try_from_bytes(suffix_bytes)?;
Ok(Self { region, suffix })
}
}

impl writeable::Writeable for SubdivisionId {
#[inline]
fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
sink.write_str(self.region.into_tinystr().to_ascii_lowercase().as_str())?;
sink.write_str(self.suffix.as_str())
}

#[inline]
fn writeable_length_hint(&self) -> writeable::LengthHint {
self.region.writeable_length_hint() + self.suffix.writeable_length_hint()
}
}

writeable::impl_display_with_writeable!(SubdivisionId);

impl FromStr for SubdivisionId {
type Err = ParseError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(s.as_bytes())
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_subdivisionid_fromstr() {
let si: SubdivisionId = "gbzzzz".parse().expect("Failed to parse SubdivisionId");
assert_eq!(si.region.to_string(), "GB");
assert_eq!(si.suffix.to_string(), "zzzz");
assert_eq!(si.to_string(), "gbzzzz");

for sample in ["", "gb", "o"] {
let oe: Result<SubdivisionId, _> = sample.parse();
assert!(oe.is_err(), "Should fail: {}", sample);
}
}
}
Loading

0 comments on commit f3b0573

Please sign in to comment.