diff --git a/components/normalizer/src/lib.rs b/components/normalizer/src/lib.rs index d6062fa48b8..74604238ad9 100644 --- a/components/normalizer/src/lib.rs +++ b/components/normalizer/src/lib.rs @@ -1503,6 +1503,13 @@ macro_rules! normalizer_methods { ret } + /// Return the index a string slice is normalized up to. + pub fn is_normalized_up_to(&self, text: &str) -> usize { + let mut sink = IsNormalizedSinkStr::new(text); + let _ = self.normalize_to(text, &mut sink); + text.len() - sink.remaining_len() + } + /// Check whether a string slice is normalized. pub fn is_normalized(&self, text: &str) -> bool { let mut sink = IsNormalizedSinkStr::new(text); @@ -1522,6 +1529,13 @@ macro_rules! normalizer_methods { ret } + /// Return the index a slice of potentially-invalid UTF-16 is normalized up to. + pub fn is_normalized_utf16_up_to(&self, text: &[u16]) -> usize { + let mut sink = IsNormalizedSinkUtf16::new(text); + let _ = self.normalize_utf16_to(text, &mut sink); + text.len() - sink.remaining_len() + } + /// Checks whether a slice of potentially-invalid UTF-16 is normalized. /// /// Unpaired surrogates are treated as the REPLACEMENT CHARACTER. @@ -1544,6 +1558,13 @@ macro_rules! normalizer_methods { ret } + /// Return the index a slice of potentially-invalid UTF-8 is normalized up to + pub fn is_normalized_utf8_up_to(&self, text: &[u8]) -> usize { + let mut sink = IsNormalizedSinkUtf8::new(text); + let _ = self.normalize_utf8_to(text, &mut sink); + text.len() - sink.remaining_len() + } + /// Check if a slice of potentially-invalid UTF-8 is normalized. /// /// Ill-formed byte sequences are mapped to the REPLACEMENT CHARACTER @@ -2673,6 +2694,9 @@ impl<'a> IsNormalizedSinkUtf16<'a> { pub fn finished(&self) -> bool { self.expect.is_empty() } + pub fn remaining_len(&self) -> usize { + self.expect.len() + } } impl<'a> Write16 for IsNormalizedSinkUtf16<'a> { @@ -2712,6 +2736,9 @@ impl<'a> IsNormalizedSinkUtf8<'a> { pub fn finished(&self) -> bool { self.expect.is_empty() } + pub fn remaining_len(&self) -> usize { + self.expect.len() + } } impl<'a> core::fmt::Write for IsNormalizedSinkUtf8<'a> { @@ -2751,6 +2778,9 @@ impl<'a> IsNormalizedSinkStr<'a> { pub fn finished(&self) -> bool { self.expect.is_empty() } + pub fn remaining_len(&self) -> usize { + self.expect.len() + } } impl<'a> core::fmt::Write for IsNormalizedSinkStr<'a> { diff --git a/components/normalizer/tests/tests.rs b/components/normalizer/tests/tests.rs index f6bad8c21b1..6544a55102c 100644 --- a/components/normalizer/tests/tests.rs +++ b/components/normalizer/tests/tests.rs @@ -1547,3 +1547,166 @@ fn test_is_normalized() { assert!(nfc.is_normalized_utf16(fraction16)); assert!(!nfkc.is_normalized_utf16(fraction16)); } + +#[test] +fn test_is_normalized_up_to() { + let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd(); + let nfkd: DecomposingNormalizer = DecomposingNormalizer::new_nfkd(); + let nfc: ComposingNormalizer = ComposingNormalizer::new_nfc(); + let nfkc: ComposingNormalizer = ComposingNormalizer::new_nfkc(); + + // Check a string slice is normalized up to where is_normalized_up_to reports + let check_str = |input: &str| { + // Check nfd + let up_to = nfd.is_normalized_up_to(input); + let (head, tail) = input.split_at(up_to); + let mut normalized = String::from(head); + let _ = nfd.normalize_to(tail, &mut normalized); + assert!(nfd.is_normalized(&normalized)); + + // Check nfkd + let up_to = nfkd.is_normalized_up_to(input); + let (head, tail) = input.split_at(up_to); + let mut normalized = String::from(head); + let _ = nfkd.normalize_to(tail, &mut normalized); + assert!(nfkd.is_normalized(&normalized)); + + // Check nfc + let up_to = nfc.is_normalized_up_to(input); + let (head, tail) = input.split_at(up_to); + let mut normalized = String::from(head); + let _ = nfc.normalize_to(tail, &mut normalized); + assert!(nfc.is_normalized(&normalized)); + + // Check nfkc + let up_to = nfkc.is_normalized_up_to(input); + let (head, tail) = input.split_at(up_to); + let mut normalized = String::from(head); + let _ = nfkc.normalize_to(tail, &mut normalized); + assert!(nfkc.is_normalized(&normalized)); + }; + + // Check a string of UTF8 bytes is normalized up to where is_normalized_up_to reports + // note: from_utf8 can panic with invalid UTF8 input + let check_utf8 = |input: &[u8]| { + // Check nfd + let up_to = nfd.is_normalized_utf8_up_to(input); + let (head, tail) = input.split_at(up_to); + let mut normalized = String::from_utf8(head.to_vec()).unwrap(); + let _ = nfd.normalize_utf8_to(tail, &mut normalized); + assert!(nfd.is_normalized(&normalized)); + + // Check nfkd + let up_to = nfkd.is_normalized_utf8_up_to(input); + let (head, tail) = input.split_at(up_to); + let mut normalized = String::from_utf8(head.to_vec()).unwrap(); + let _ = nfkd.normalize_utf8_to(tail, &mut normalized); + assert!(nfkd.is_normalized(&normalized)); + + // Check nfc + let up_to = nfc.is_normalized_utf8_up_to(input); + let (head, tail) = input.split_at(up_to); + let mut normalized = String::from_utf8(head.to_vec()).unwrap(); + let _ = nfc.normalize_utf8_to(tail, &mut normalized); + assert!(nfc.is_normalized(&normalized)); + + // Check nfkc + let up_to = nfkc.is_normalized_utf8_up_to(input); + let (head, tail) = input.split_at(up_to); + let mut normalized = String::from_utf8(head.to_vec()).unwrap(); + let _ = nfkc.normalize_utf8_to(tail, &mut normalized); + assert!(nfkc.is_normalized(&normalized)); + }; + + // todo: UTF16 tests? + + let aaa = "aaa"; + check_str(aaa); + + let aaa_utf8 = aaa.as_bytes(); + check_utf8(aaa_utf8); + + assert!(nfd.is_normalized_up_to(aaa) == aaa.len()); + assert!(nfkd.is_normalized_up_to(aaa) == aaa.len()); + assert!(nfc.is_normalized_up_to(aaa) == aaa.len()); + assert!(nfkc.is_normalized_up_to(aaa) == aaa.len()); + assert!(nfd.is_normalized_utf8_up_to(aaa_utf8) == aaa_utf8.len()); + assert!(nfkd.is_normalized_utf8_up_to(aaa_utf8) == aaa_utf8.len()); + assert!(nfc.is_normalized_utf8_up_to(aaa_utf8) == aaa_utf8.len()); + assert!(nfkc.is_normalized_utf8_up_to(aaa_utf8) == aaa_utf8.len()); + + let note = "a𝅗\u{1D165}a"; + check_str(note); + + let note_utf8 = note.as_bytes(); + check_utf8(note_utf8); + + assert!(nfd.is_normalized_up_to(note) == note.len()); + assert!(nfkd.is_normalized_up_to(note) == note.len()); + assert!(nfc.is_normalized_up_to(note) == note.len()); + assert!(nfkc.is_normalized_up_to(note) == note.len()); + assert!(nfd.is_normalized_utf8_up_to(note_utf8) == note_utf8.len()); + assert!(nfkd.is_normalized_utf8_up_to(note_utf8) == note_utf8.len()); + assert!(nfc.is_normalized_utf8_up_to(note_utf8) == note_utf8.len()); + assert!(nfkc.is_normalized_utf8_up_to(note_utf8) == note_utf8.len()); + + let umlaut = "aäa"; + check_str(umlaut); + + let umlaut_utf8 = umlaut.as_bytes(); + check_utf8(umlaut_utf8); + + assert_eq!(nfd.is_normalized_up_to(umlaut), 1); + assert_eq!(nfkd.is_normalized_up_to(umlaut), 1); + assert_eq!(nfc.is_normalized_up_to(umlaut), 4); + assert_eq!(nfkc.is_normalized_up_to(umlaut), 4); + assert_eq!(nfd.is_normalized_utf8_up_to(umlaut_utf8), 1); + assert_eq!(nfkd.is_normalized_utf8_up_to(umlaut_utf8), 1); + assert_eq!(nfc.is_normalized_utf8_up_to(umlaut_utf8), 4); + assert_eq!(nfkc.is_normalized_utf8_up_to(umlaut_utf8), 4); + + let fraction = "a½a"; + check_str(fraction); + + let fraction_utf8 = fraction.as_bytes(); + check_utf8(fraction_utf8); + + assert_eq!(nfd.is_normalized_up_to(fraction), 4); + assert_eq!(nfkd.is_normalized_up_to(fraction), 1); + assert_eq!(nfc.is_normalized_up_to(fraction), 4); + assert_eq!(nfkc.is_normalized_up_to(fraction), 1); + assert_eq!(nfd.is_normalized_utf8_up_to(fraction_utf8), 4); + assert_eq!(nfkd.is_normalized_utf8_up_to(fraction_utf8), 1); + assert_eq!(nfc.is_normalized_utf8_up_to(fraction_utf8), 4); + assert_eq!(nfkc.is_normalized_utf8_up_to(fraction_utf8), 1); + + let reversed_vietnamese = "e\u{0302}\u{0323}"; + check_str(reversed_vietnamese); + + let reversed_vietnamese_utf8 = reversed_vietnamese.as_bytes(); + check_utf8(reversed_vietnamese_utf8); + + assert_eq!(nfd.is_normalized_up_to(reversed_vietnamese), 1); + assert_eq!(nfkd.is_normalized_up_to(reversed_vietnamese), 1); + assert_eq!(nfc.is_normalized_up_to(reversed_vietnamese), 0); + assert_eq!(nfkc.is_normalized_up_to(reversed_vietnamese), 0); + assert_eq!(nfd.is_normalized_utf8_up_to(reversed_vietnamese_utf8), 1); + assert_eq!(nfkd.is_normalized_utf8_up_to(reversed_vietnamese_utf8), 1); + assert_eq!(nfc.is_normalized_utf8_up_to(reversed_vietnamese_utf8), 0); + assert_eq!(nfkc.is_normalized_utf8_up_to(reversed_vietnamese_utf8), 0); + + let truncated_vietnamese = "e\u{0302}"; + check_str(truncated_vietnamese); + + let truncated_vietnamese_utf8 = truncated_vietnamese.as_bytes(); + check_utf8(truncated_vietnamese_utf8); + + assert_eq!(nfd.is_normalized_up_to(truncated_vietnamese), 3); + assert_eq!(nfkd.is_normalized_up_to(truncated_vietnamese), 3); + assert_eq!(nfc.is_normalized_up_to(truncated_vietnamese), 0); + assert_eq!(nfkc.is_normalized_up_to(truncated_vietnamese), 0); + assert_eq!(nfd.is_normalized_utf8_up_to(truncated_vietnamese_utf8), 3); + assert_eq!(nfkd.is_normalized_utf8_up_to(truncated_vietnamese_utf8), 3); + assert_eq!(nfc.is_normalized_utf8_up_to(truncated_vietnamese_utf8), 0); + assert_eq!(nfkc.is_normalized_utf8_up_to(truncated_vietnamese_utf8), 0); +} diff --git a/ffi/capi/bindings/c/ICU4XComposingNormalizer.h b/ffi/capi/bindings/c/ICU4XComposingNormalizer.h index caab1c7fc77..71f1c9826bd 100644 --- a/ffi/capi/bindings/c/ICU4XComposingNormalizer.h +++ b/ffi/capi/bindings/c/ICU4XComposingNormalizer.h @@ -27,6 +27,12 @@ void ICU4XComposingNormalizer_normalize(const ICU4XComposingNormalizer* self, co bool ICU4XComposingNormalizer_is_normalized(const ICU4XComposingNormalizer* self, const char* s_data, size_t s_len); +bool ICU4XComposingNormalizer_is_normalized_utf16(const ICU4XComposingNormalizer* self, const char16_t* s_data, size_t s_len); + +size_t ICU4XComposingNormalizer_is_normalized_up_to(const ICU4XComposingNormalizer* self, const char* s_data, size_t s_len); + +size_t ICU4XComposingNormalizer_is_normalized_utf16_up_to(const ICU4XComposingNormalizer* self, const char16_t* s_data, size_t s_len); + void ICU4XComposingNormalizer_destroy(ICU4XComposingNormalizer* self); diff --git a/ffi/capi/bindings/c/ICU4XDecomposingNormalizer.h b/ffi/capi/bindings/c/ICU4XDecomposingNormalizer.h index de22fdeab9d..7de7a99390c 100644 --- a/ffi/capi/bindings/c/ICU4XDecomposingNormalizer.h +++ b/ffi/capi/bindings/c/ICU4XDecomposingNormalizer.h @@ -27,6 +27,12 @@ void ICU4XDecomposingNormalizer_normalize(const ICU4XDecomposingNormalizer* self bool ICU4XDecomposingNormalizer_is_normalized(const ICU4XDecomposingNormalizer* self, const char* s_data, size_t s_len); +bool ICU4XDecomposingNormalizer_is_normalized_utf16(const ICU4XDecomposingNormalizer* self, const char16_t* s_data, size_t s_len); + +size_t ICU4XDecomposingNormalizer_is_normalized_up_to(const ICU4XDecomposingNormalizer* self, const char* s_data, size_t s_len); + +size_t ICU4XDecomposingNormalizer_is_normalized_utf16_up_to(const ICU4XDecomposingNormalizer* self, const char16_t* s_data, size_t s_len); + void ICU4XDecomposingNormalizer_destroy(ICU4XDecomposingNormalizer* self); diff --git a/ffi/capi/bindings/cpp/ICU4XComposingNormalizer.d.hpp b/ffi/capi/bindings/cpp/ICU4XComposingNormalizer.d.hpp index 96167ff4b67..c3497443f1e 100644 --- a/ffi/capi/bindings/cpp/ICU4XComposingNormalizer.d.hpp +++ b/ffi/capi/bindings/cpp/ICU4XComposingNormalizer.d.hpp @@ -29,6 +29,12 @@ class ICU4XComposingNormalizer { inline bool is_normalized(std::string_view s) const; + inline bool is_normalized_utf16(std::u16string_view s) const; + + inline size_t is_normalized_up_to(std::string_view s) const; + + inline size_t is_normalized_utf16_up_to(std::u16string_view s) const; + inline const capi::ICU4XComposingNormalizer* AsFFI() const; inline capi::ICU4XComposingNormalizer* AsFFI(); inline static const ICU4XComposingNormalizer* FromFFI(const capi::ICU4XComposingNormalizer* ptr); diff --git a/ffi/capi/bindings/cpp/ICU4XComposingNormalizer.hpp b/ffi/capi/bindings/cpp/ICU4XComposingNormalizer.hpp index 26b3dab1370..7a0f8e9a31e 100644 --- a/ffi/capi/bindings/cpp/ICU4XComposingNormalizer.hpp +++ b/ffi/capi/bindings/cpp/ICU4XComposingNormalizer.hpp @@ -27,6 +27,12 @@ namespace capi { bool ICU4XComposingNormalizer_is_normalized(const ICU4XComposingNormalizer* self, const char* s_data, size_t s_len); + bool ICU4XComposingNormalizer_is_normalized_utf16(const ICU4XComposingNormalizer* self, const char16_t* s_data, size_t s_len); + + size_t ICU4XComposingNormalizer_is_normalized_up_to(const ICU4XComposingNormalizer* self, const char* s_data, size_t s_len); + + size_t ICU4XComposingNormalizer_is_normalized_utf16_up_to(const ICU4XComposingNormalizer* self, const char16_t* s_data, size_t s_len); + void ICU4XComposingNormalizer_destroy(ICU4XComposingNormalizer* self); @@ -60,6 +66,27 @@ inline bool ICU4XComposingNormalizer::is_normalized(std::string_view s) const { return result; } +inline bool ICU4XComposingNormalizer::is_normalized_utf16(std::u16string_view s) const { + auto result = capi::ICU4XComposingNormalizer_is_normalized_utf16(this->AsFFI(), + s.data(), + s.size()); + return result; +} + +inline size_t ICU4XComposingNormalizer::is_normalized_up_to(std::string_view s) const { + auto result = capi::ICU4XComposingNormalizer_is_normalized_up_to(this->AsFFI(), + s.data(), + s.size()); + return result; +} + +inline size_t ICU4XComposingNormalizer::is_normalized_utf16_up_to(std::u16string_view s) const { + auto result = capi::ICU4XComposingNormalizer_is_normalized_utf16_up_to(this->AsFFI(), + s.data(), + s.size()); + return result; +} + inline const capi::ICU4XComposingNormalizer* ICU4XComposingNormalizer::AsFFI() const { return reinterpret_cast(this); } diff --git a/ffi/capi/bindings/cpp/ICU4XDecomposingNormalizer.d.hpp b/ffi/capi/bindings/cpp/ICU4XDecomposingNormalizer.d.hpp index 2f54e2b17d3..ce95de8b297 100644 --- a/ffi/capi/bindings/cpp/ICU4XDecomposingNormalizer.d.hpp +++ b/ffi/capi/bindings/cpp/ICU4XDecomposingNormalizer.d.hpp @@ -29,6 +29,12 @@ class ICU4XDecomposingNormalizer { inline bool is_normalized(std::string_view s) const; + inline bool is_normalized_utf16(std::u16string_view s) const; + + inline size_t is_normalized_up_to(std::string_view s) const; + + inline size_t is_normalized_utf16_up_to(std::u16string_view s) const; + inline const capi::ICU4XDecomposingNormalizer* AsFFI() const; inline capi::ICU4XDecomposingNormalizer* AsFFI(); inline static const ICU4XDecomposingNormalizer* FromFFI(const capi::ICU4XDecomposingNormalizer* ptr); diff --git a/ffi/capi/bindings/cpp/ICU4XDecomposingNormalizer.hpp b/ffi/capi/bindings/cpp/ICU4XDecomposingNormalizer.hpp index c7de8e6aeea..b87a4c9a7d3 100644 --- a/ffi/capi/bindings/cpp/ICU4XDecomposingNormalizer.hpp +++ b/ffi/capi/bindings/cpp/ICU4XDecomposingNormalizer.hpp @@ -27,6 +27,12 @@ namespace capi { bool ICU4XDecomposingNormalizer_is_normalized(const ICU4XDecomposingNormalizer* self, const char* s_data, size_t s_len); + bool ICU4XDecomposingNormalizer_is_normalized_utf16(const ICU4XDecomposingNormalizer* self, const char16_t* s_data, size_t s_len); + + size_t ICU4XDecomposingNormalizer_is_normalized_up_to(const ICU4XDecomposingNormalizer* self, const char* s_data, size_t s_len); + + size_t ICU4XDecomposingNormalizer_is_normalized_utf16_up_to(const ICU4XDecomposingNormalizer* self, const char16_t* s_data, size_t s_len); + void ICU4XDecomposingNormalizer_destroy(ICU4XDecomposingNormalizer* self); @@ -60,6 +66,27 @@ inline bool ICU4XDecomposingNormalizer::is_normalized(std::string_view s) const return result; } +inline bool ICU4XDecomposingNormalizer::is_normalized_utf16(std::u16string_view s) const { + auto result = capi::ICU4XDecomposingNormalizer_is_normalized_utf16(this->AsFFI(), + s.data(), + s.size()); + return result; +} + +inline size_t ICU4XDecomposingNormalizer::is_normalized_up_to(std::string_view s) const { + auto result = capi::ICU4XDecomposingNormalizer_is_normalized_up_to(this->AsFFI(), + s.data(), + s.size()); + return result; +} + +inline size_t ICU4XDecomposingNormalizer::is_normalized_utf16_up_to(std::u16string_view s) const { + auto result = capi::ICU4XDecomposingNormalizer_is_normalized_utf16_up_to(this->AsFFI(), + s.data(), + s.size()); + return result; +} + inline const capi::ICU4XDecomposingNormalizer* ICU4XDecomposingNormalizer::AsFFI() const { return reinterpret_cast(this); } diff --git a/ffi/capi/bindings/dart/ComposingNormalizer.g.dart b/ffi/capi/bindings/dart/ComposingNormalizer.g.dart index 4ed67781843..8340b004efb 100644 --- a/ffi/capi/bindings/dart/ComposingNormalizer.g.dart +++ b/ffi/capi/bindings/dart/ComposingNormalizer.g.dart @@ -76,6 +76,42 @@ final class ComposingNormalizer implements ffi.Finalizable { temp.releaseAll(); return result; } + + /// Check if a string is normalized + /// + /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according + /// to the WHATWG Encoding Standard. + /// + /// See the [Rust documentation for `is_normalized_utf16`](https://docs.rs/icu/latest/icu/normalizer/struct.ComposingNormalizer.html#method.is_normalized_utf16) for more information. + bool isNormalizedUtf16(String s) { + final temp = ffi2.Arena(); + final sView = s.utf16View; + final result = _ICU4XComposingNormalizer_is_normalized_utf16(_ffi, sView.allocIn(temp), sView.length); + temp.releaseAll(); + return result; + } + + /// Return the index a slice of potentially-invalid UTF-8 is normalized up to + /// + /// See the [Rust documentation for `is_normalized_utf8_up_to`](https://docs.rs/icu/latest/icu/normalizer/struct.ComposingNormalizer.html#method.is_normalized_utf8_up_to) for more information. + int isNormalizedUpTo(String s) { + final temp = ffi2.Arena(); + final sView = s.utf8View; + final result = _ICU4XComposingNormalizer_is_normalized_up_to(_ffi, sView.allocIn(temp), sView.length); + temp.releaseAll(); + return result; + } + + /// Return the index a slice of potentially-invalid UTF-8 is normalized up to + /// + /// See the [Rust documentation for `is_normalized_utf16_up_to`](https://docs.rs/icu/latest/icu/normalizer/struct.ComposingNormalizer.html#method.is_normalized_utf16_up_to) for more information. + int isNormalizedUtf16UpTo(String s) { + final temp = ffi2.Arena(); + final sView = s.utf16View; + final result = _ICU4XComposingNormalizer_is_normalized_utf16_up_to(_ffi, sView.allocIn(temp), sView.length); + temp.releaseAll(); + return result; + } } @meta.ResourceIdentifier('ICU4XComposingNormalizer_destroy') @@ -102,3 +138,18 @@ external void _ICU4XComposingNormalizer_normalize(ffi.Pointer self, @ffi.Native, ffi.Pointer, ffi.Size)>(isLeaf: true, symbol: 'ICU4XComposingNormalizer_is_normalized') // ignore: non_constant_identifier_names external bool _ICU4XComposingNormalizer_is_normalized(ffi.Pointer self, ffi.Pointer sData, int sLength); + +@meta.ResourceIdentifier('ICU4XComposingNormalizer_is_normalized_utf16') +@ffi.Native, ffi.Pointer, ffi.Size)>(isLeaf: true, symbol: 'ICU4XComposingNormalizer_is_normalized_utf16') +// ignore: non_constant_identifier_names +external bool _ICU4XComposingNormalizer_is_normalized_utf16(ffi.Pointer self, ffi.Pointer sData, int sLength); + +@meta.ResourceIdentifier('ICU4XComposingNormalizer_is_normalized_up_to') +@ffi.Native, ffi.Pointer, ffi.Size)>(isLeaf: true, symbol: 'ICU4XComposingNormalizer_is_normalized_up_to') +// ignore: non_constant_identifier_names +external int _ICU4XComposingNormalizer_is_normalized_up_to(ffi.Pointer self, ffi.Pointer sData, int sLength); + +@meta.ResourceIdentifier('ICU4XComposingNormalizer_is_normalized_utf16_up_to') +@ffi.Native, ffi.Pointer, ffi.Size)>(isLeaf: true, symbol: 'ICU4XComposingNormalizer_is_normalized_utf16_up_to') +// ignore: non_constant_identifier_names +external int _ICU4XComposingNormalizer_is_normalized_utf16_up_to(ffi.Pointer self, ffi.Pointer sData, int sLength); diff --git a/ffi/capi/bindings/dart/DecomposingNormalizer.g.dart b/ffi/capi/bindings/dart/DecomposingNormalizer.g.dart index 4af5ec26b47..fa6b3aee81d 100644 --- a/ffi/capi/bindings/dart/DecomposingNormalizer.g.dart +++ b/ffi/capi/bindings/dart/DecomposingNormalizer.g.dart @@ -76,6 +76,42 @@ final class DecomposingNormalizer implements ffi.Finalizable { temp.releaseAll(); return result; } + + /// Check if a string is normalized + /// + /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according + /// to the WHATWG Encoding Standard. + /// + /// See the [Rust documentation for `is_normalized_utf16`](https://docs.rs/icu/latest/icu/normalizer/struct.DecomposingNormalizer.html#method.is_normalized_utf16) for more information. + bool isNormalizedUtf16(String s) { + final temp = ffi2.Arena(); + final sView = s.utf16View; + final result = _ICU4XDecomposingNormalizer_is_normalized_utf16(_ffi, sView.allocIn(temp), sView.length); + temp.releaseAll(); + return result; + } + + /// Return the index a slice of potentially-invalid UTF-8 is normalized up to + /// + /// See the [Rust documentation for `is_normalized_utf8_up_to`](https://docs.rs/icu/latest/icu/normalizer/struct.DecomposingNormalizer.html#method.is_normalized_utf8_up_to) for more information. + int isNormalizedUpTo(String s) { + final temp = ffi2.Arena(); + final sView = s.utf8View; + final result = _ICU4XDecomposingNormalizer_is_normalized_up_to(_ffi, sView.allocIn(temp), sView.length); + temp.releaseAll(); + return result; + } + + /// Return the index a slice of potentially-invalid UTF-8 is normalized up to + /// + /// See the [Rust documentation for `is_normalized_utf16_up_to`](https://docs.rs/icu/latest/icu/normalizer/struct.DecomposingNormalizer.html#method.is_normalized_utf16_up_to) for more information. + int isNormalizedUtf16UpTo(String s) { + final temp = ffi2.Arena(); + final sView = s.utf16View; + final result = _ICU4XDecomposingNormalizer_is_normalized_utf16_up_to(_ffi, sView.allocIn(temp), sView.length); + temp.releaseAll(); + return result; + } } @meta.ResourceIdentifier('ICU4XDecomposingNormalizer_destroy') @@ -102,3 +138,18 @@ external void _ICU4XDecomposingNormalizer_normalize(ffi.Pointer self @ffi.Native, ffi.Pointer, ffi.Size)>(isLeaf: true, symbol: 'ICU4XDecomposingNormalizer_is_normalized') // ignore: non_constant_identifier_names external bool _ICU4XDecomposingNormalizer_is_normalized(ffi.Pointer self, ffi.Pointer sData, int sLength); + +@meta.ResourceIdentifier('ICU4XDecomposingNormalizer_is_normalized_utf16') +@ffi.Native, ffi.Pointer, ffi.Size)>(isLeaf: true, symbol: 'ICU4XDecomposingNormalizer_is_normalized_utf16') +// ignore: non_constant_identifier_names +external bool _ICU4XDecomposingNormalizer_is_normalized_utf16(ffi.Pointer self, ffi.Pointer sData, int sLength); + +@meta.ResourceIdentifier('ICU4XDecomposingNormalizer_is_normalized_up_to') +@ffi.Native, ffi.Pointer, ffi.Size)>(isLeaf: true, symbol: 'ICU4XDecomposingNormalizer_is_normalized_up_to') +// ignore: non_constant_identifier_names +external int _ICU4XDecomposingNormalizer_is_normalized_up_to(ffi.Pointer self, ffi.Pointer sData, int sLength); + +@meta.ResourceIdentifier('ICU4XDecomposingNormalizer_is_normalized_utf16_up_to') +@ffi.Native, ffi.Pointer, ffi.Size)>(isLeaf: true, symbol: 'ICU4XDecomposingNormalizer_is_normalized_utf16_up_to') +// ignore: non_constant_identifier_names +external int _ICU4XDecomposingNormalizer_is_normalized_utf16_up_to(ffi.Pointer self, ffi.Pointer sData, int sLength); diff --git a/ffi/capi/bindings/js/ICU4XComposingNormalizer.d.ts b/ffi/capi/bindings/js/ICU4XComposingNormalizer.d.ts index d6bc6a0783a..ec1c7d1a915 100644 --- a/ffi/capi/bindings/js/ICU4XComposingNormalizer.d.ts +++ b/ffi/capi/bindings/js/ICU4XComposingNormalizer.d.ts @@ -1,3 +1,4 @@ +import { usize } from "./diplomat-runtime" import { FFIError } from "./diplomat-runtime" import { ICU4XDataError } from "./ICU4XDataError"; import { ICU4XDataProvider } from "./ICU4XDataProvider"; @@ -45,4 +46,30 @@ export class ICU4XComposingNormalizer { * See the {@link https://docs.rs/icu/latest/icu/normalizer/struct.ComposingNormalizer.html#method.is_normalized_utf8 Rust documentation for `is_normalized_utf8`} for more information. */ is_normalized(s: string): boolean; + + /** + + * Check if a string is normalized + + * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according to the WHATWG Encoding Standard. + + * See the {@link https://docs.rs/icu/latest/icu/normalizer/struct.ComposingNormalizer.html#method.is_normalized_utf16 Rust documentation for `is_normalized_utf16`} for more information. + */ + is_normalized_utf16(s: string): boolean; + + /** + + * Return the index a slice of potentially-invalid UTF-8 is normalized up to + + * See the {@link https://docs.rs/icu/latest/icu/normalizer/struct.ComposingNormalizer.html#method.is_normalized_utf8_up_to Rust documentation for `is_normalized_utf8_up_to`} for more information. + */ + is_normalized_up_to(s: string): usize; + + /** + + * Return the index a slice of potentially-invalid UTF-8 is normalized up to + + * See the {@link https://docs.rs/icu/latest/icu/normalizer/struct.ComposingNormalizer.html#method.is_normalized_utf16_up_to Rust documentation for `is_normalized_utf16_up_to`} for more information. + */ + is_normalized_utf16_up_to(s: string): usize; } diff --git a/ffi/capi/bindings/js/ICU4XComposingNormalizer.mjs b/ffi/capi/bindings/js/ICU4XComposingNormalizer.mjs index ecd5bb50a89..af7be886151 100644 --- a/ffi/capi/bindings/js/ICU4XComposingNormalizer.mjs +++ b/ffi/capi/bindings/js/ICU4XComposingNormalizer.mjs @@ -65,4 +65,25 @@ export class ICU4XComposingNormalizer { buf_arg_s.free(); return diplomat_out; } + + is_normalized_utf16(arg_s) { + const buf_arg_s = diplomatRuntime.DiplomatBuf.str16(wasm, arg_s); + const diplomat_out = wasm.ICU4XComposingNormalizer_is_normalized_utf16(this.underlying, buf_arg_s.ptr, buf_arg_s.size); + buf_arg_s.free(); + return diplomat_out; + } + + is_normalized_up_to(arg_s) { + const buf_arg_s = diplomatRuntime.DiplomatBuf.str8(wasm, arg_s); + const diplomat_out = wasm.ICU4XComposingNormalizer_is_normalized_up_to(this.underlying, buf_arg_s.ptr, buf_arg_s.size); + buf_arg_s.free(); + return diplomat_out; + } + + is_normalized_utf16_up_to(arg_s) { + const buf_arg_s = diplomatRuntime.DiplomatBuf.str16(wasm, arg_s); + const diplomat_out = wasm.ICU4XComposingNormalizer_is_normalized_utf16_up_to(this.underlying, buf_arg_s.ptr, buf_arg_s.size); + buf_arg_s.free(); + return diplomat_out; + } } diff --git a/ffi/capi/bindings/js/ICU4XDecomposingNormalizer.d.ts b/ffi/capi/bindings/js/ICU4XDecomposingNormalizer.d.ts index 1f2d7942b67..2f9b0a0ee8c 100644 --- a/ffi/capi/bindings/js/ICU4XDecomposingNormalizer.d.ts +++ b/ffi/capi/bindings/js/ICU4XDecomposingNormalizer.d.ts @@ -1,3 +1,4 @@ +import { usize } from "./diplomat-runtime" import { FFIError } from "./diplomat-runtime" import { ICU4XDataError } from "./ICU4XDataError"; import { ICU4XDataProvider } from "./ICU4XDataProvider"; @@ -45,4 +46,30 @@ export class ICU4XDecomposingNormalizer { * See the {@link https://docs.rs/icu/latest/icu/normalizer/struct.DecomposingNormalizer.html#method.is_normalized_utf8 Rust documentation for `is_normalized_utf8`} for more information. */ is_normalized(s: string): boolean; + + /** + + * Check if a string is normalized + + * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according to the WHATWG Encoding Standard. + + * See the {@link https://docs.rs/icu/latest/icu/normalizer/struct.DecomposingNormalizer.html#method.is_normalized_utf16 Rust documentation for `is_normalized_utf16`} for more information. + */ + is_normalized_utf16(s: string): boolean; + + /** + + * Return the index a slice of potentially-invalid UTF-8 is normalized up to + + * See the {@link https://docs.rs/icu/latest/icu/normalizer/struct.DecomposingNormalizer.html#method.is_normalized_utf8_up_to Rust documentation for `is_normalized_utf8_up_to`} for more information. + */ + is_normalized_up_to(s: string): usize; + + /** + + * Return the index a slice of potentially-invalid UTF-8 is normalized up to + + * See the {@link https://docs.rs/icu/latest/icu/normalizer/struct.DecomposingNormalizer.html#method.is_normalized_utf16_up_to Rust documentation for `is_normalized_utf16_up_to`} for more information. + */ + is_normalized_utf16_up_to(s: string): usize; } diff --git a/ffi/capi/bindings/js/ICU4XDecomposingNormalizer.mjs b/ffi/capi/bindings/js/ICU4XDecomposingNormalizer.mjs index 210b1000fd9..8fb9b4bb2a1 100644 --- a/ffi/capi/bindings/js/ICU4XDecomposingNormalizer.mjs +++ b/ffi/capi/bindings/js/ICU4XDecomposingNormalizer.mjs @@ -65,4 +65,25 @@ export class ICU4XDecomposingNormalizer { buf_arg_s.free(); return diplomat_out; } + + is_normalized_utf16(arg_s) { + const buf_arg_s = diplomatRuntime.DiplomatBuf.str16(wasm, arg_s); + const diplomat_out = wasm.ICU4XDecomposingNormalizer_is_normalized_utf16(this.underlying, buf_arg_s.ptr, buf_arg_s.size); + buf_arg_s.free(); + return diplomat_out; + } + + is_normalized_up_to(arg_s) { + const buf_arg_s = diplomatRuntime.DiplomatBuf.str8(wasm, arg_s); + const diplomat_out = wasm.ICU4XDecomposingNormalizer_is_normalized_up_to(this.underlying, buf_arg_s.ptr, buf_arg_s.size); + buf_arg_s.free(); + return diplomat_out; + } + + is_normalized_utf16_up_to(arg_s) { + const buf_arg_s = diplomatRuntime.DiplomatBuf.str16(wasm, arg_s); + const diplomat_out = wasm.ICU4XDecomposingNormalizer_is_normalized_utf16_up_to(this.underlying, buf_arg_s.ptr, buf_arg_s.size); + buf_arg_s.free(); + return diplomat_out; + } } diff --git a/ffi/capi/src/normalizer.rs b/ffi/capi/src/normalizer.rs index 98136b38db2..22d38a255b7 100644 --- a/ffi/capi/src/normalizer.rs +++ b/ffi/capi/src/normalizer.rs @@ -74,6 +74,41 @@ pub mod ffi { pub fn is_normalized(&self, s: &DiplomatStr) -> bool { self.0.is_normalized_utf8(s) } + + /// Check if a string is normalized + /// + /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according + /// to the WHATWG Encoding Standard. + #[diplomat::rust_link( + icu::normalizer::ComposingNormalizer::is_normalized_utf16, + FnInStruct + )] + pub fn is_normalized_utf16(&self, s: &DiplomatStr16) -> bool { + self.0.is_normalized_utf16(s) + } + + /// Return the index a slice of potentially-invalid UTF-8 is normalized up to + #[diplomat::rust_link( + icu::normalizer::ComposingNormalizer::is_normalized_utf8_up_to, + FnInStruct + )] + #[diplomat::rust_link( + icu::normalizer::ComposingNormalizer::is_normalized_up_to, + FnInStruct, + hidden + )] + pub fn is_normalized_up_to(&self, s: &DiplomatStr) -> usize { + self.0.is_normalized_utf8_up_to(s) + } + + /// Return the index a slice of potentially-invalid UTF-8 is normalized up to + #[diplomat::rust_link( + icu::normalizer::ComposingNormalizer::is_normalized_utf16_up_to, + FnInStruct + )] + pub fn is_normalized_utf16_up_to(&self, s: &DiplomatStr16) -> usize { + self.0.is_normalized_utf16_up_to(s) + } } #[diplomat::opaque] @@ -149,5 +184,40 @@ pub mod ffi { pub fn is_normalized(&self, s: &DiplomatStr) -> bool { self.0.is_normalized_utf8(s) } + + /// Check if a string is normalized + /// + /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according + /// to the WHATWG Encoding Standard. + #[diplomat::rust_link( + icu::normalizer::DecomposingNormalizer::is_normalized_utf16, + FnInStruct + )] + pub fn is_normalized_utf16(&self, s: &DiplomatStr16) -> bool { + self.0.is_normalized_utf16(s) + } + + /// Return the index a slice of potentially-invalid UTF-8 is normalized up to + #[diplomat::rust_link( + icu::normalizer::DecomposingNormalizer::is_normalized_utf8_up_to, + FnInStruct + )] + #[diplomat::rust_link( + icu::normalizer::DecomposingNormalizer::is_normalized_up_to, + FnInStruct, + hidden + )] + pub fn is_normalized_up_to(&self, s: &DiplomatStr) -> usize { + self.0.is_normalized_utf8_up_to(s) + } + + /// Return the index a slice of potentially-invalid UTF-8 is normalized up to + #[diplomat::rust_link( + icu::normalizer::DecomposingNormalizer::is_normalized_utf16_up_to, + FnInStruct + )] + pub fn is_normalized_utf16_up_to(&self, s: &DiplomatStr16) -> usize { + self.0.is_normalized_utf16_up_to(s) + } } } diff --git a/tools/ffi_coverage/src/allowlist.rs b/tools/ffi_coverage/src/allowlist.rs index ca5ba4a45ce..6f2ea0ad812 100644 --- a/tools/ffi_coverage/src/allowlist.rs +++ b/tools/ffi_coverage/src/allowlist.rs @@ -241,10 +241,8 @@ lazy_static::lazy_static! { // Do not want for 2.0: we need DiplomatWriteable16 "icu::normalizer::ComposingNormalizer::normalize_utf16", "icu::normalizer::ComposingNormalizer::normalize_utf16_to", - "icu::normalizer::ComposingNormalizer::is_normalized_utf16", "icu::normalizer::DecomposingNormalizer::normalize_utf16", "icu::normalizer::DecomposingNormalizer::normalize_utf16_to", - "icu::normalizer::DecomposingNormalizer::is_normalized_utf16", // Do not want for 2.0: // Can't be exposed till diplomat has input iterators, as well as