From df05d0385af1885f8d46dc573157a1025e10e1a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mi=C8=9Bca=20Dumitru?= Date: Tue, 25 Jul 2023 21:53:47 +0300 Subject: [PATCH] glib: Bind `g_unichar` APIs Some notes: * names were made "rustier", e.g.:` g_unichar_istitle` became `is_titlecase` * I did not include APIs that were already availabe through Rust's char --- .typos.toml | 2 +- glib/Gir.toml | 11 + glib/src/auto/enums.rs | 445 +++++++++++++++++++++++++++++++++++++++++ glib/src/auto/mod.rs | 2 + glib/src/lib.rs | 2 + glib/src/unichar.rs | 243 ++++++++++++++++++++++ 6 files changed, 704 insertions(+), 1 deletion(-) create mode 100644 glib/src/unichar.rs diff --git a/.typos.toml b/.typos.toml index d6492320e734..b1f994b22659 100644 --- a/.typos.toml +++ b/.typos.toml @@ -1,5 +1,5 @@ [files] -extend-exclude = ["auto", "sys", "gobject-sys", "*.svg"] +extend-exclude = ["auto", "sys", "gobject-sys", "*.svg", "glib/Gir.toml"] [default.extend-words] # Ignore false-positives diff --git a/glib/Gir.toml b/glib/Gir.toml index 313c4b095212..f3702aedda51 100644 --- a/glib/Gir.toml +++ b/glib/Gir.toml @@ -31,6 +31,7 @@ generate = [ "GLib.SpawnFlags", "GLib.Time", "GLib.TimeType", + "GLib.UnicodeType", "GLib.UriError", "GLib.UriFlags", "GLib.UriHideFlags", @@ -513,6 +514,9 @@ status = "generate" name = "variant_get_gtype" # get_type() function that should be used in StaticType impl instead ignore = true + [[object.function]] + pattern = "unichar_((break_)?type|get_(script|mirror_char)|combining_class|is(mark|graph|punct|title|defined|wide(_cjk)?|zerowidth)|totitle|(fully_)?decompose|compose)" + manual = true # defined as extension methods on char [[object]] name = "GLib.Checksum" @@ -771,6 +775,13 @@ concurrency = "send+sync" # in-out parameter manual = true +[[object]] +name = "GLib.UnicodeBreakType" +status = "generate" + [[object.member]] + name = "close_paranthesis" + ignore = true + [[object]] name = "GLib.UnicodeScript" status = "generate" diff --git a/glib/src/auto/enums.rs b/glib/src/auto/enums.rs index d34e42797689..7b6706ef534b 100644 --- a/glib/src/auto/enums.rs +++ b/glib/src/auto/enums.rs @@ -1052,6 +1052,261 @@ impl FromGlib for TimeType { } } +#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)] +#[non_exhaustive] +#[doc(alias = "GUnicodeBreakType")] +pub enum UnicodeBreakType { + #[doc(alias = "G_UNICODE_BREAK_MANDATORY")] + Mandatory, + #[doc(alias = "G_UNICODE_BREAK_CARRIAGE_RETURN")] + CarriageReturn, + #[doc(alias = "G_UNICODE_BREAK_LINE_FEED")] + LineFeed, + #[doc(alias = "G_UNICODE_BREAK_COMBINING_MARK")] + CombiningMark, + #[doc(alias = "G_UNICODE_BREAK_SURROGATE")] + Surrogate, + #[doc(alias = "G_UNICODE_BREAK_ZERO_WIDTH_SPACE")] + ZeroWidthSpace, + #[doc(alias = "G_UNICODE_BREAK_INSEPARABLE")] + Inseparable, + #[doc(alias = "G_UNICODE_BREAK_NON_BREAKING_GLUE")] + NonBreakingGlue, + #[doc(alias = "G_UNICODE_BREAK_CONTINGENT")] + Contingent, + #[doc(alias = "G_UNICODE_BREAK_SPACE")] + Space, + #[doc(alias = "G_UNICODE_BREAK_AFTER")] + After, + #[doc(alias = "G_UNICODE_BREAK_BEFORE")] + Before, + #[doc(alias = "G_UNICODE_BREAK_BEFORE_AND_AFTER")] + BeforeAndAfter, + #[doc(alias = "G_UNICODE_BREAK_HYPHEN")] + Hyphen, + #[doc(alias = "G_UNICODE_BREAK_NON_STARTER")] + NonStarter, + #[doc(alias = "G_UNICODE_BREAK_OPEN_PUNCTUATION")] + OpenPunctuation, + #[doc(alias = "G_UNICODE_BREAK_CLOSE_PUNCTUATION")] + ClosePunctuation, + #[doc(alias = "G_UNICODE_BREAK_QUOTATION")] + Quotation, + #[doc(alias = "G_UNICODE_BREAK_EXCLAMATION")] + Exclamation, + #[doc(alias = "G_UNICODE_BREAK_IDEOGRAPHIC")] + Ideographic, + #[doc(alias = "G_UNICODE_BREAK_NUMERIC")] + Numeric, + #[doc(alias = "G_UNICODE_BREAK_INFIX_SEPARATOR")] + InfixSeparator, + #[doc(alias = "G_UNICODE_BREAK_SYMBOL")] + Symbol, + #[doc(alias = "G_UNICODE_BREAK_ALPHABETIC")] + Alphabetic, + #[doc(alias = "G_UNICODE_BREAK_PREFIX")] + Prefix, + #[doc(alias = "G_UNICODE_BREAK_POSTFIX")] + Postfix, + #[doc(alias = "G_UNICODE_BREAK_COMPLEX_CONTEXT")] + ComplexContext, + #[doc(alias = "G_UNICODE_BREAK_AMBIGUOUS")] + Ambiguous, + #[doc(alias = "G_UNICODE_BREAK_UNKNOWN")] + Unknown, + #[doc(alias = "G_UNICODE_BREAK_NEXT_LINE")] + NextLine, + #[doc(alias = "G_UNICODE_BREAK_WORD_JOINER")] + WordJoiner, + #[doc(alias = "G_UNICODE_BREAK_HANGUL_L_JAMO")] + HangulLJamo, + #[doc(alias = "G_UNICODE_BREAK_HANGUL_V_JAMO")] + HangulVJamo, + #[doc(alias = "G_UNICODE_BREAK_HANGUL_T_JAMO")] + HangulTJamo, + #[doc(alias = "G_UNICODE_BREAK_HANGUL_LV_SYLLABLE")] + HangulLvSyllable, + #[doc(alias = "G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE")] + HangulLvtSyllable, + #[doc(alias = "G_UNICODE_BREAK_CLOSE_PARENTHESIS")] + CloseParenthesis, + #[doc(alias = "G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER")] + ConditionalJapaneseStarter, + #[doc(alias = "G_UNICODE_BREAK_HEBREW_LETTER")] + HebrewLetter, + #[doc(alias = "G_UNICODE_BREAK_REGIONAL_INDICATOR")] + RegionalIndicator, + #[doc(alias = "G_UNICODE_BREAK_EMOJI_BASE")] + EmojiBase, + #[doc(alias = "G_UNICODE_BREAK_EMOJI_MODIFIER")] + EmojiModifier, + #[doc(alias = "G_UNICODE_BREAK_ZERO_WIDTH_JOINER")] + ZeroWidthJoiner, + #[doc(hidden)] + __Unknown(i32), +} + +impl fmt::Display for UnicodeBreakType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "UnicodeBreakType::{}", + match *self { + Self::Mandatory => "Mandatory", + Self::CarriageReturn => "CarriageReturn", + Self::LineFeed => "LineFeed", + Self::CombiningMark => "CombiningMark", + Self::Surrogate => "Surrogate", + Self::ZeroWidthSpace => "ZeroWidthSpace", + Self::Inseparable => "Inseparable", + Self::NonBreakingGlue => "NonBreakingGlue", + Self::Contingent => "Contingent", + Self::Space => "Space", + Self::After => "After", + Self::Before => "Before", + Self::BeforeAndAfter => "BeforeAndAfter", + Self::Hyphen => "Hyphen", + Self::NonStarter => "NonStarter", + Self::OpenPunctuation => "OpenPunctuation", + Self::ClosePunctuation => "ClosePunctuation", + Self::Quotation => "Quotation", + Self::Exclamation => "Exclamation", + Self::Ideographic => "Ideographic", + Self::Numeric => "Numeric", + Self::InfixSeparator => "InfixSeparator", + Self::Symbol => "Symbol", + Self::Alphabetic => "Alphabetic", + Self::Prefix => "Prefix", + Self::Postfix => "Postfix", + Self::ComplexContext => "ComplexContext", + Self::Ambiguous => "Ambiguous", + Self::Unknown => "Unknown", + Self::NextLine => "NextLine", + Self::WordJoiner => "WordJoiner", + Self::HangulLJamo => "HangulLJamo", + Self::HangulVJamo => "HangulVJamo", + Self::HangulTJamo => "HangulTJamo", + Self::HangulLvSyllable => "HangulLvSyllable", + Self::HangulLvtSyllable => "HangulLvtSyllable", + Self::CloseParenthesis => "CloseParenthesis", + Self::ConditionalJapaneseStarter => "ConditionalJapaneseStarter", + Self::HebrewLetter => "HebrewLetter", + Self::RegionalIndicator => "RegionalIndicator", + Self::EmojiBase => "EmojiBase", + Self::EmojiModifier => "EmojiModifier", + Self::ZeroWidthJoiner => "ZeroWidthJoiner", + _ => "Unknown", + } + ) + } +} + +#[doc(hidden)] +impl IntoGlib for UnicodeBreakType { + type GlibType = ffi::GUnicodeBreakType; + + fn into_glib(self) -> ffi::GUnicodeBreakType { + match self { + Self::Mandatory => ffi::G_UNICODE_BREAK_MANDATORY, + Self::CarriageReturn => ffi::G_UNICODE_BREAK_CARRIAGE_RETURN, + Self::LineFeed => ffi::G_UNICODE_BREAK_LINE_FEED, + Self::CombiningMark => ffi::G_UNICODE_BREAK_COMBINING_MARK, + Self::Surrogate => ffi::G_UNICODE_BREAK_SURROGATE, + Self::ZeroWidthSpace => ffi::G_UNICODE_BREAK_ZERO_WIDTH_SPACE, + Self::Inseparable => ffi::G_UNICODE_BREAK_INSEPARABLE, + Self::NonBreakingGlue => ffi::G_UNICODE_BREAK_NON_BREAKING_GLUE, + Self::Contingent => ffi::G_UNICODE_BREAK_CONTINGENT, + Self::Space => ffi::G_UNICODE_BREAK_SPACE, + Self::After => ffi::G_UNICODE_BREAK_AFTER, + Self::Before => ffi::G_UNICODE_BREAK_BEFORE, + Self::BeforeAndAfter => ffi::G_UNICODE_BREAK_BEFORE_AND_AFTER, + Self::Hyphen => ffi::G_UNICODE_BREAK_HYPHEN, + Self::NonStarter => ffi::G_UNICODE_BREAK_NON_STARTER, + Self::OpenPunctuation => ffi::G_UNICODE_BREAK_OPEN_PUNCTUATION, + Self::ClosePunctuation => ffi::G_UNICODE_BREAK_CLOSE_PUNCTUATION, + Self::Quotation => ffi::G_UNICODE_BREAK_QUOTATION, + Self::Exclamation => ffi::G_UNICODE_BREAK_EXCLAMATION, + Self::Ideographic => ffi::G_UNICODE_BREAK_IDEOGRAPHIC, + Self::Numeric => ffi::G_UNICODE_BREAK_NUMERIC, + Self::InfixSeparator => ffi::G_UNICODE_BREAK_INFIX_SEPARATOR, + Self::Symbol => ffi::G_UNICODE_BREAK_SYMBOL, + Self::Alphabetic => ffi::G_UNICODE_BREAK_ALPHABETIC, + Self::Prefix => ffi::G_UNICODE_BREAK_PREFIX, + Self::Postfix => ffi::G_UNICODE_BREAK_POSTFIX, + Self::ComplexContext => ffi::G_UNICODE_BREAK_COMPLEX_CONTEXT, + Self::Ambiguous => ffi::G_UNICODE_BREAK_AMBIGUOUS, + Self::Unknown => ffi::G_UNICODE_BREAK_UNKNOWN, + Self::NextLine => ffi::G_UNICODE_BREAK_NEXT_LINE, + Self::WordJoiner => ffi::G_UNICODE_BREAK_WORD_JOINER, + Self::HangulLJamo => ffi::G_UNICODE_BREAK_HANGUL_L_JAMO, + Self::HangulVJamo => ffi::G_UNICODE_BREAK_HANGUL_V_JAMO, + Self::HangulTJamo => ffi::G_UNICODE_BREAK_HANGUL_T_JAMO, + Self::HangulLvSyllable => ffi::G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, + Self::HangulLvtSyllable => ffi::G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE, + Self::CloseParenthesis => ffi::G_UNICODE_BREAK_CLOSE_PARENTHESIS, + Self::ConditionalJapaneseStarter => ffi::G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER, + Self::HebrewLetter => ffi::G_UNICODE_BREAK_HEBREW_LETTER, + Self::RegionalIndicator => ffi::G_UNICODE_BREAK_REGIONAL_INDICATOR, + Self::EmojiBase => ffi::G_UNICODE_BREAK_EMOJI_BASE, + Self::EmojiModifier => ffi::G_UNICODE_BREAK_EMOJI_MODIFIER, + Self::ZeroWidthJoiner => ffi::G_UNICODE_BREAK_ZERO_WIDTH_JOINER, + Self::__Unknown(value) => value, + } + } +} + +#[doc(hidden)] +impl FromGlib for UnicodeBreakType { + unsafe fn from_glib(value: ffi::GUnicodeBreakType) -> Self { + match value { + ffi::G_UNICODE_BREAK_MANDATORY => Self::Mandatory, + ffi::G_UNICODE_BREAK_CARRIAGE_RETURN => Self::CarriageReturn, + ffi::G_UNICODE_BREAK_LINE_FEED => Self::LineFeed, + ffi::G_UNICODE_BREAK_COMBINING_MARK => Self::CombiningMark, + ffi::G_UNICODE_BREAK_SURROGATE => Self::Surrogate, + ffi::G_UNICODE_BREAK_ZERO_WIDTH_SPACE => Self::ZeroWidthSpace, + ffi::G_UNICODE_BREAK_INSEPARABLE => Self::Inseparable, + ffi::G_UNICODE_BREAK_NON_BREAKING_GLUE => Self::NonBreakingGlue, + ffi::G_UNICODE_BREAK_CONTINGENT => Self::Contingent, + ffi::G_UNICODE_BREAK_SPACE => Self::Space, + ffi::G_UNICODE_BREAK_AFTER => Self::After, + ffi::G_UNICODE_BREAK_BEFORE => Self::Before, + ffi::G_UNICODE_BREAK_BEFORE_AND_AFTER => Self::BeforeAndAfter, + ffi::G_UNICODE_BREAK_HYPHEN => Self::Hyphen, + ffi::G_UNICODE_BREAK_NON_STARTER => Self::NonStarter, + ffi::G_UNICODE_BREAK_OPEN_PUNCTUATION => Self::OpenPunctuation, + ffi::G_UNICODE_BREAK_CLOSE_PUNCTUATION => Self::ClosePunctuation, + ffi::G_UNICODE_BREAK_QUOTATION => Self::Quotation, + ffi::G_UNICODE_BREAK_EXCLAMATION => Self::Exclamation, + ffi::G_UNICODE_BREAK_IDEOGRAPHIC => Self::Ideographic, + ffi::G_UNICODE_BREAK_NUMERIC => Self::Numeric, + ffi::G_UNICODE_BREAK_INFIX_SEPARATOR => Self::InfixSeparator, + ffi::G_UNICODE_BREAK_SYMBOL => Self::Symbol, + ffi::G_UNICODE_BREAK_ALPHABETIC => Self::Alphabetic, + ffi::G_UNICODE_BREAK_PREFIX => Self::Prefix, + ffi::G_UNICODE_BREAK_POSTFIX => Self::Postfix, + ffi::G_UNICODE_BREAK_COMPLEX_CONTEXT => Self::ComplexContext, + ffi::G_UNICODE_BREAK_AMBIGUOUS => Self::Ambiguous, + ffi::G_UNICODE_BREAK_UNKNOWN => Self::Unknown, + ffi::G_UNICODE_BREAK_NEXT_LINE => Self::NextLine, + ffi::G_UNICODE_BREAK_WORD_JOINER => Self::WordJoiner, + ffi::G_UNICODE_BREAK_HANGUL_L_JAMO => Self::HangulLJamo, + ffi::G_UNICODE_BREAK_HANGUL_V_JAMO => Self::HangulVJamo, + ffi::G_UNICODE_BREAK_HANGUL_T_JAMO => Self::HangulTJamo, + ffi::G_UNICODE_BREAK_HANGUL_LV_SYLLABLE => Self::HangulLvSyllable, + ffi::G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE => Self::HangulLvtSyllable, + ffi::G_UNICODE_BREAK_CLOSE_PARENTHESIS => Self::CloseParenthesis, + ffi::G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER => Self::ConditionalJapaneseStarter, + ffi::G_UNICODE_BREAK_HEBREW_LETTER => Self::HebrewLetter, + ffi::G_UNICODE_BREAK_REGIONAL_INDICATOR => Self::RegionalIndicator, + ffi::G_UNICODE_BREAK_EMOJI_BASE => Self::EmojiBase, + ffi::G_UNICODE_BREAK_EMOJI_MODIFIER => Self::EmojiModifier, + ffi::G_UNICODE_BREAK_ZERO_WIDTH_JOINER => Self::ZeroWidthJoiner, + value => Self::__Unknown(value), + } + } +} + #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)] #[non_exhaustive] #[doc(alias = "GUnicodeScript")] @@ -1962,6 +2217,196 @@ impl FromGlib for UnicodeScript { } } +#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)] +#[non_exhaustive] +#[doc(alias = "GUnicodeType")] +pub enum UnicodeType { + #[doc(alias = "G_UNICODE_CONTROL")] + Control, + #[doc(alias = "G_UNICODE_FORMAT")] + Format, + #[doc(alias = "G_UNICODE_UNASSIGNED")] + Unassigned, + #[doc(alias = "G_UNICODE_PRIVATE_USE")] + PrivateUse, + #[doc(alias = "G_UNICODE_SURROGATE")] + Surrogate, + #[doc(alias = "G_UNICODE_LOWERCASE_LETTER")] + LowercaseLetter, + #[doc(alias = "G_UNICODE_MODIFIER_LETTER")] + ModifierLetter, + #[doc(alias = "G_UNICODE_OTHER_LETTER")] + OtherLetter, + #[doc(alias = "G_UNICODE_TITLECASE_LETTER")] + TitlecaseLetter, + #[doc(alias = "G_UNICODE_UPPERCASE_LETTER")] + UppercaseLetter, + #[doc(alias = "G_UNICODE_SPACING_MARK")] + SpacingMark, + #[doc(alias = "G_UNICODE_ENCLOSING_MARK")] + EnclosingMark, + #[doc(alias = "G_UNICODE_NON_SPACING_MARK")] + NonSpacingMark, + #[doc(alias = "G_UNICODE_DECIMAL_NUMBER")] + DecimalNumber, + #[doc(alias = "G_UNICODE_LETTER_NUMBER")] + LetterNumber, + #[doc(alias = "G_UNICODE_OTHER_NUMBER")] + OtherNumber, + #[doc(alias = "G_UNICODE_CONNECT_PUNCTUATION")] + ConnectPunctuation, + #[doc(alias = "G_UNICODE_DASH_PUNCTUATION")] + DashPunctuation, + #[doc(alias = "G_UNICODE_CLOSE_PUNCTUATION")] + ClosePunctuation, + #[doc(alias = "G_UNICODE_FINAL_PUNCTUATION")] + FinalPunctuation, + #[doc(alias = "G_UNICODE_INITIAL_PUNCTUATION")] + InitialPunctuation, + #[doc(alias = "G_UNICODE_OTHER_PUNCTUATION")] + OtherPunctuation, + #[doc(alias = "G_UNICODE_OPEN_PUNCTUATION")] + OpenPunctuation, + #[doc(alias = "G_UNICODE_CURRENCY_SYMBOL")] + CurrencySymbol, + #[doc(alias = "G_UNICODE_MODIFIER_SYMBOL")] + ModifierSymbol, + #[doc(alias = "G_UNICODE_MATH_SYMBOL")] + MathSymbol, + #[doc(alias = "G_UNICODE_OTHER_SYMBOL")] + OtherSymbol, + #[doc(alias = "G_UNICODE_LINE_SEPARATOR")] + LineSeparator, + #[doc(alias = "G_UNICODE_PARAGRAPH_SEPARATOR")] + ParagraphSeparator, + #[doc(alias = "G_UNICODE_SPACE_SEPARATOR")] + SpaceSeparator, + #[doc(hidden)] + __Unknown(i32), +} + +impl fmt::Display for UnicodeType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "UnicodeType::{}", + match *self { + Self::Control => "Control", + Self::Format => "Format", + Self::Unassigned => "Unassigned", + Self::PrivateUse => "PrivateUse", + Self::Surrogate => "Surrogate", + Self::LowercaseLetter => "LowercaseLetter", + Self::ModifierLetter => "ModifierLetter", + Self::OtherLetter => "OtherLetter", + Self::TitlecaseLetter => "TitlecaseLetter", + Self::UppercaseLetter => "UppercaseLetter", + Self::SpacingMark => "SpacingMark", + Self::EnclosingMark => "EnclosingMark", + Self::NonSpacingMark => "NonSpacingMark", + Self::DecimalNumber => "DecimalNumber", + Self::LetterNumber => "LetterNumber", + Self::OtherNumber => "OtherNumber", + Self::ConnectPunctuation => "ConnectPunctuation", + Self::DashPunctuation => "DashPunctuation", + Self::ClosePunctuation => "ClosePunctuation", + Self::FinalPunctuation => "FinalPunctuation", + Self::InitialPunctuation => "InitialPunctuation", + Self::OtherPunctuation => "OtherPunctuation", + Self::OpenPunctuation => "OpenPunctuation", + Self::CurrencySymbol => "CurrencySymbol", + Self::ModifierSymbol => "ModifierSymbol", + Self::MathSymbol => "MathSymbol", + Self::OtherSymbol => "OtherSymbol", + Self::LineSeparator => "LineSeparator", + Self::ParagraphSeparator => "ParagraphSeparator", + Self::SpaceSeparator => "SpaceSeparator", + _ => "Unknown", + } + ) + } +} + +#[doc(hidden)] +impl IntoGlib for UnicodeType { + type GlibType = ffi::GUnicodeType; + + fn into_glib(self) -> ffi::GUnicodeType { + match self { + Self::Control => ffi::G_UNICODE_CONTROL, + Self::Format => ffi::G_UNICODE_FORMAT, + Self::Unassigned => ffi::G_UNICODE_UNASSIGNED, + Self::PrivateUse => ffi::G_UNICODE_PRIVATE_USE, + Self::Surrogate => ffi::G_UNICODE_SURROGATE, + Self::LowercaseLetter => ffi::G_UNICODE_LOWERCASE_LETTER, + Self::ModifierLetter => ffi::G_UNICODE_MODIFIER_LETTER, + Self::OtherLetter => ffi::G_UNICODE_OTHER_LETTER, + Self::TitlecaseLetter => ffi::G_UNICODE_TITLECASE_LETTER, + Self::UppercaseLetter => ffi::G_UNICODE_UPPERCASE_LETTER, + Self::SpacingMark => ffi::G_UNICODE_SPACING_MARK, + Self::EnclosingMark => ffi::G_UNICODE_ENCLOSING_MARK, + Self::NonSpacingMark => ffi::G_UNICODE_NON_SPACING_MARK, + Self::DecimalNumber => ffi::G_UNICODE_DECIMAL_NUMBER, + Self::LetterNumber => ffi::G_UNICODE_LETTER_NUMBER, + Self::OtherNumber => ffi::G_UNICODE_OTHER_NUMBER, + Self::ConnectPunctuation => ffi::G_UNICODE_CONNECT_PUNCTUATION, + Self::DashPunctuation => ffi::G_UNICODE_DASH_PUNCTUATION, + Self::ClosePunctuation => ffi::G_UNICODE_CLOSE_PUNCTUATION, + Self::FinalPunctuation => ffi::G_UNICODE_FINAL_PUNCTUATION, + Self::InitialPunctuation => ffi::G_UNICODE_INITIAL_PUNCTUATION, + Self::OtherPunctuation => ffi::G_UNICODE_OTHER_PUNCTUATION, + Self::OpenPunctuation => ffi::G_UNICODE_OPEN_PUNCTUATION, + Self::CurrencySymbol => ffi::G_UNICODE_CURRENCY_SYMBOL, + Self::ModifierSymbol => ffi::G_UNICODE_MODIFIER_SYMBOL, + Self::MathSymbol => ffi::G_UNICODE_MATH_SYMBOL, + Self::OtherSymbol => ffi::G_UNICODE_OTHER_SYMBOL, + Self::LineSeparator => ffi::G_UNICODE_LINE_SEPARATOR, + Self::ParagraphSeparator => ffi::G_UNICODE_PARAGRAPH_SEPARATOR, + Self::SpaceSeparator => ffi::G_UNICODE_SPACE_SEPARATOR, + Self::__Unknown(value) => value, + } + } +} + +#[doc(hidden)] +impl FromGlib for UnicodeType { + unsafe fn from_glib(value: ffi::GUnicodeType) -> Self { + match value { + ffi::G_UNICODE_CONTROL => Self::Control, + ffi::G_UNICODE_FORMAT => Self::Format, + ffi::G_UNICODE_UNASSIGNED => Self::Unassigned, + ffi::G_UNICODE_PRIVATE_USE => Self::PrivateUse, + ffi::G_UNICODE_SURROGATE => Self::Surrogate, + ffi::G_UNICODE_LOWERCASE_LETTER => Self::LowercaseLetter, + ffi::G_UNICODE_MODIFIER_LETTER => Self::ModifierLetter, + ffi::G_UNICODE_OTHER_LETTER => Self::OtherLetter, + ffi::G_UNICODE_TITLECASE_LETTER => Self::TitlecaseLetter, + ffi::G_UNICODE_UPPERCASE_LETTER => Self::UppercaseLetter, + ffi::G_UNICODE_SPACING_MARK => Self::SpacingMark, + ffi::G_UNICODE_ENCLOSING_MARK => Self::EnclosingMark, + ffi::G_UNICODE_NON_SPACING_MARK => Self::NonSpacingMark, + ffi::G_UNICODE_DECIMAL_NUMBER => Self::DecimalNumber, + ffi::G_UNICODE_LETTER_NUMBER => Self::LetterNumber, + ffi::G_UNICODE_OTHER_NUMBER => Self::OtherNumber, + ffi::G_UNICODE_CONNECT_PUNCTUATION => Self::ConnectPunctuation, + ffi::G_UNICODE_DASH_PUNCTUATION => Self::DashPunctuation, + ffi::G_UNICODE_CLOSE_PUNCTUATION => Self::ClosePunctuation, + ffi::G_UNICODE_FINAL_PUNCTUATION => Self::FinalPunctuation, + ffi::G_UNICODE_INITIAL_PUNCTUATION => Self::InitialPunctuation, + ffi::G_UNICODE_OTHER_PUNCTUATION => Self::OtherPunctuation, + ffi::G_UNICODE_OPEN_PUNCTUATION => Self::OpenPunctuation, + ffi::G_UNICODE_CURRENCY_SYMBOL => Self::CurrencySymbol, + ffi::G_UNICODE_MODIFIER_SYMBOL => Self::ModifierSymbol, + ffi::G_UNICODE_MATH_SYMBOL => Self::MathSymbol, + ffi::G_UNICODE_OTHER_SYMBOL => Self::OtherSymbol, + ffi::G_UNICODE_LINE_SEPARATOR => Self::LineSeparator, + ffi::G_UNICODE_PARAGRAPH_SEPARATOR => Self::ParagraphSeparator, + ffi::G_UNICODE_SPACE_SEPARATOR => Self::SpaceSeparator, + value => Self::__Unknown(value), + } + } +} + #[cfg(feature = "v2_66")] #[cfg_attr(docsrs, doc(cfg(feature = "v2_66")))] #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)] diff --git a/glib/src/auto/mod.rs b/glib/src/auto/mod.rs index 801f25bf3443..bbe89c83a522 100644 --- a/glib/src/auto/mod.rs +++ b/glib/src/auto/mod.rs @@ -46,7 +46,9 @@ pub use self::enums::NormalizeMode; pub use self::enums::OptionArg; pub use self::enums::SeekType; pub use self::enums::TimeType; +pub use self::enums::UnicodeBreakType; pub use self::enums::UnicodeScript; +pub use self::enums::UnicodeType; #[cfg(feature = "v2_66")] #[cfg_attr(docsrs, doc(cfg(feature = "v2_66")))] pub use self::enums::UriError; diff --git a/glib/src/lib.rs b/glib/src/lib.rs index 0374fdbe1ee7..24e141343640 100644 --- a/glib/src/lib.rs +++ b/glib/src/lib.rs @@ -149,6 +149,8 @@ mod unicollate; pub use self::unicollate::{CollationKey, FilenameCollationKey}; mod utils; pub use self::utils::*; +mod unichar; +pub use self::unichar::*; mod main_context; mod main_context_channel; pub use self::{ diff --git a/glib/src/unichar.rs b/glib/src/unichar.rs new file mode 100644 index 000000000000..435744e11fca --- /dev/null +++ b/glib/src/unichar.rs @@ -0,0 +1,243 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use std::mem::MaybeUninit; + +use crate::{ + translate::{from_glib, IntoGlib, UnsafeFrom}, + UnicodeBreakType, UnicodeScript, UnicodeType, +}; + +mod sealed { + pub trait Sealed {} + impl Sealed for char {} +} + +impl UnsafeFrom for char { + #[inline] + unsafe fn unsafe_from(t: u32) -> Self { + debug_assert!( + char::try_from(t).is_ok(), + "glib returned an invalid Unicode codepoint" + ); + unsafe { char::from_u32_unchecked(t) } + } +} + +// rustdoc-stripper-ignore-next +/// The kind of decomposition to perform +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum DecompositionKind { + // rustdoc-stripper-ignore-next + /// Compatibility decomposition + Compatibility, + + // rustdoc-stripper-ignore-next + /// Canonical decomposition + Canonical, +} + +// rustdoc-stripper-ignore-next +/// The result of a single step of the Unicode canonical decomposition algorithm +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum CharacterDecomposition { + // rustdoc-stripper-ignore-next + /// The character could not be decomposed further + NoDecomposition, + // rustdoc-stripper-ignore-next + // A 'singleton' decomposition, which means the character was replaced by another + Singleton(char), + // rustdoc-stripper-ignore-next + /// The first character may decompose further, but the second cannot + Pair(char, char), +} + +// rustdoc-stripper-ignore-next +/// This trait provides access to Unicode character classification and manipulations functions +/// provided by GLib that do not exist in the standard library +#[doc(alias = "g_unichar")] +pub trait Unichar: sealed::Sealed + Copy + Into + UnsafeFrom { + #[doc(alias = "g_unichar_type")] + #[doc(alias = "unichar_type")] + #[inline] + fn unicode_type(self) -> UnicodeType { + unsafe { from_glib(ffi::g_unichar_type(self.into())) } + } + + #[doc(alias = "g_unichar_break_type")] + #[doc(alias = "unichar_break_type")] + #[inline] + fn break_type(self) -> UnicodeBreakType { + unsafe { from_glib(ffi::g_unichar_break_type(self.into())) } + } + + #[doc(alias = "g_unichar_get_script")] + #[doc(alias = "unichar_get_script")] + #[inline] + fn script(self) -> UnicodeScript { + unsafe { from_glib(ffi::g_unichar_get_script(self.into())) } + } + + #[doc(alias = "g_unichar_combining_class")] + #[doc(alias = "unichar_combining_class")] + #[inline] + fn combining_class(self) -> u8 { + // UAX #44 ยง 5.7.4: The character property invariants regarding Canonical_Combining_Class + // guarantee that [...] all values used will be in the range 0..254. + // So this cast is fine + unsafe { ffi::g_unichar_combining_class(self.into()) as u8 } + } + + #[doc(alias = "g_unichar_ismark")] + #[doc(alias = "unichar_ismark")] + #[inline] + fn is_mark(self) -> bool { + unsafe { from_glib(ffi::g_unichar_ismark(self.into())) } + } + + #[doc(alias = "g_unichar_isgraph")] + #[doc(alias = "unichar_isgraph")] + #[inline] + fn is_graphical(self) -> bool { + unsafe { from_glib(ffi::g_unichar_isgraph(self.into())) } + } + + #[doc(alias = "g_unichar_ispunct")] + #[doc(alias = "unichar_ispunct")] + #[inline] + fn is_punctuation(self) -> bool { + unsafe { from_glib(ffi::g_unichar_ispunct(self.into())) } + } + + #[doc(alias = "g_unichar_istitle")] + #[doc(alias = "unichar_istitle")] + #[inline] + fn is_titlecase(self) -> bool { + unsafe { from_glib(ffi::g_unichar_istitle(self.into())) } + } + + #[doc(alias = "g_unichar_isdefined")] + #[doc(alias = "unichar_isdefined")] + #[inline] + fn is_defined(self) -> bool { + unsafe { from_glib(ffi::g_unichar_isdefined(self.into())) } + } + + #[doc(alias = "g_unichar_iswide")] + #[doc(alias = "unichar_iswide")] + #[inline] + fn is_wide(self) -> bool { + unsafe { from_glib(ffi::g_unichar_iswide(self.into())) } + } + + #[doc(alias = "g_unichar_iswide_cjk")] + #[doc(alias = "unichar_iswide_cjk")] + #[inline] + fn is_wide_cjk(self) -> bool { + unsafe { from_glib(ffi::g_unichar_iswide_cjk(self.into())) } + } + + #[doc(alias = "g_unichar_iszerowidth")] + #[doc(alias = "unichar_iszerowidth")] + #[inline] + fn is_zero_width(self) -> bool { + unsafe { from_glib(ffi::g_unichar_iszerowidth(self.into())) } + } + + #[doc(alias = "g_unichar_totitle")] + #[doc(alias = "unichar_totitle")] + #[inline] + fn to_titlecase(self) -> Self { + unsafe { Self::unsafe_from(ffi::g_unichar_totitle(self.into())) } + } + + #[doc(alias = "g_unichar_get_mirror_char")] + #[doc(alias = "unichar_get_mirror_char")] + #[inline] + fn mirror_char(self) -> Option { + // SAFETY: If g_unichar_get_mirror_char returns true, it will initialize `mirrored` + unsafe { + let mut mirrored = MaybeUninit::uninit(); + let res = from_glib(ffi::g_unichar_get_mirror_char( + self.into(), + mirrored.as_mut_ptr(), + )); + if res { + Some(Self::unsafe_from(mirrored.assume_init())) + } else { + None + } + } + } + + #[doc(alias = "g_unichar_fully_decompose")] + #[doc(alias = "unichar_fully_decompose")] + #[inline] + fn fully_decompose(self, decomposition_kind: DecompositionKind) -> Vec { + let compat = match decomposition_kind { + DecompositionKind::Compatibility => true, + DecompositionKind::Canonical => false, + }; + let buffer_len = ffi::G_UNICHAR_MAX_DECOMPOSITION_LENGTH as usize; + + // SAFETY: We assume glib only ever writes valid Unicode codepoints in the provided buffer + // and that it does not lie about the + unsafe { + let mut buffer = Vec::::with_capacity(buffer_len); + let decomposition_length = ffi::g_unichar_fully_decompose( + self.into(), + compat.into_glib(), + buffer.as_mut_ptr().cast(), + buffer_len, + ); + debug_assert!(decomposition_length <= buffer_len); + buffer.set_len(decomposition_length); + buffer + } + } + + #[doc(alias = "g_unichar_decompose")] + #[doc(alias = "unichar_decompose")] + #[inline] + fn decompose(self) -> CharacterDecomposition { + // SAFETY: `a` and `b` will always be init after the g_unichar_decompose call returns + unsafe { + let mut a = MaybeUninit::uninit(); + let mut b = MaybeUninit::uninit(); + let res = from_glib(ffi::g_unichar_decompose( + self.into(), + a.as_mut_ptr(), + b.as_mut_ptr(), + )); + + if res { + let (a, b) = (a.assume_init(), b.assume_init()); + if b == 0 { + CharacterDecomposition::Singleton(char::unsafe_from(a)) + } else { + CharacterDecomposition::Pair(char::unsafe_from(a), char::unsafe_from(b)) + } + } else { + CharacterDecomposition::NoDecomposition + } + } + } + + #[doc(alias = "g_unichar_compose")] + #[doc(alias = "unichar_compose")] + #[inline] + fn compose(a: char, b: char) -> Option { + // SAFETY: If g_unichar_compose returns true, it will initialize `out` + unsafe { + let mut out = MaybeUninit::uninit(); + let res = from_glib(ffi::g_unichar_compose(a.into(), b.into(), out.as_mut_ptr())); + + if res { + Some(Self::unsafe_from(out.assume_init())) + } else { + None + } + } + } +} + +impl Unichar for char {}