diff --git a/src/attr.rs b/src/attr.rs index 9980f69b..9a6ec63f 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -1,7 +1,6 @@ use crate::CowStr; use crate::DiscontinuousString; use crate::Span; -use std::borrow::Cow; use std::fmt; use State::*; @@ -114,19 +113,13 @@ impl<'s> Attributes<'s> { } pub(crate) fn parse>(&mut self, input: S) -> bool { - #[inline] - fn borrow(cow: CowStr) -> &str { - match cow { - Cow::Owned(_) => panic!(), - Cow::Borrowed(s) => s, - } - } - for elem in Parser::new(input.chars()) { match elem { Element::Class(c) => self.insert("class", input.src(c).into()), Element::Identifier(i) => self.insert("id", input.src(i).into()), - Element::Attribute(a, v) => self.insert(borrow(input.src(a)), input.src(v).into()), + Element::Attribute(a, v) => { + self.insert(input.src(a).take_borrowed(), input.src(v).into()) + } Element::Invalid => return false, } } diff --git a/src/lib.rs b/src/lib.rs index 100a9c19..fb2c5263 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,14 +58,16 @@ mod block; mod inline; mod lex; mod span; +mod string; mod tree; use span::DiscontinuousString; use span::Span; pub use attr::{AttributeValue, AttributeValueParts, Attributes}; +pub use string::CowStr; -type CowStr<'s> = std::borrow::Cow<'s, str>; +// type CowStr<'s> = std::borrow::Cow<'s, str>; pub trait Render { /// Push [`Event`]s to a unicode-accepting buffer or stream. @@ -695,10 +697,7 @@ impl<'s> Parser<'s> { inline::Container::InlineMath => Container::Math { display: false }, inline::Container::DisplayMath => Container::Math { display: true }, inline::Container::RawFormat => Container::RawInline { - format: match self.inlines.src(inline.span) { - CowStr::Owned(_) => panic!(), - CowStr::Borrowed(s) => s, - }, + format: self.inlines.src(inline.span).take_borrowed(), }, inline::Container::Subscript => Container::Subscript, inline::Container::Superscript => Container::Superscript, @@ -709,22 +708,22 @@ impl<'s> Parser<'s> { inline::Container::Mark => Container::Mark, inline::Container::InlineLink => Container::Link( match self.inlines.src(inline.span) { - CowStr::Owned(s) => s.replace('\n', "").into(), s @ CowStr::Borrowed(_) => s, + s => s.replace("\n", ""), }, LinkType::Span(SpanLinkType::Inline), ), inline::Container::InlineImage => Container::Image( match self.inlines.src(inline.span) { - CowStr::Owned(s) => s.replace('\n', "").into(), s @ CowStr::Borrowed(_) => s, + s => s.replace("\n", ""), }, SpanLinkType::Inline, ), inline::Container::ReferenceLink | inline::Container::ReferenceImage => { let tag = match self.inlines.src(inline.span) { - CowStr::Owned(s) => s.replace('\n', " ").into(), s @ CowStr::Borrowed(_) => s, + s => s.replace("\n", " "), }; let link_def = self.pre_pass.link_definitions.get(tag.as_ref()).cloned(); @@ -762,10 +761,7 @@ impl<'s> Parser<'s> { } inline::EventKind::Atom(a) => match a { inline::Atom::FootnoteReference => { - let tag = match self.inlines.src(inline.span) { - CowStr::Borrowed(s) => s, - CowStr::Owned(..) => panic!(), - }; + let tag = self.inlines.src(inline.span).take_borrowed(); let number = self .footnote_references .iter() @@ -778,10 +774,7 @@ impl<'s> Parser<'s> { |i| i + 1, ); Event::FootnoteReference( - match self.inlines.src(inline.span) { - CowStr::Borrowed(s) => s, - CowStr::Owned(..) => panic!(), - }, + self.inlines.src(inline.span).take_borrowed(), number, ) } diff --git a/src/string.rs b/src/string.rs new file mode 100644 index 00000000..3faf5bcb --- /dev/null +++ b/src/string.rs @@ -0,0 +1,136 @@ +use std::{borrow::Borrow, cmp::Ordering, fmt::Display, ops::Deref, str::from_utf8}; + +// Largest CowStr variant is Owned(String). A String uses 3 words of memory, but a fourth word is +// needed to hold the tag (the tag takes a byte, but a full word is used for alignment reasons.) +// This means that the available space we have for an inline string is 4 words - 2 bytes for the +// tag and length. +const MAX_INLINE_STR_LEN: usize = 4 * std::mem::size_of::() - 2; + +#[derive(Debug)] +pub enum CowStr<'s> { + Owned(String), + Borrowed(&'s str), + Inlined([u8; MAX_INLINE_STR_LEN], u8), +} + +impl<'s> CowStr<'s> { + pub fn take_borrowed(self) -> &'s str { + match self { + CowStr::Borrowed(s) => s, + CowStr::Owned(_) | CowStr::Inlined(..) => panic!(), + } + } + + pub fn replace(self, from: &str, to: &str) -> Self { + if from.is_empty() { + return self; + } + + match self { + CowStr::Owned(s) => CowStr::Owned(s.replace(from, to)), + CowStr::Inlined(mut inner, mut len) => { + for (start, _) in self.deref().match_indices(from) { + match from.len().cmp(&to.len()) { + Ordering::Less => { + len += (to.len() - from.len()) as u8; + if len as usize > MAX_INLINE_STR_LEN { + return CowStr::Owned(self.deref().replace(from, to)); + } + inner[start + from.len()..].rotate_right(to.len() - from.len()); + } + Ordering::Greater => { + inner[start..].rotate_left(from.len() - to.len()); + len -= (from.len() - to.len()) as u8; + } + Ordering::Equal => {} + } + + inner[start..start + to.len()].copy_from_slice(to.as_bytes()); + } + + CowStr::Inlined(inner, len) + } + CowStr::Borrowed(s) => CowStr::Owned(s.replace(from, to)), + } + } +} + +impl<'s> Deref for CowStr<'s> { + type Target = str; + + fn deref(&self) -> &Self::Target { + match *self { + Self::Owned(ref s) => s.borrow(), + Self::Borrowed(s) => s, + // NOTE: Inlined strings can only be constructed from strings or chars, which means they + // are guaranteed to be valid UTF-8. We could consider unchecked conversion as well, but + // a benchmark should be done before introducing unsafes. + Self::Inlined(ref inner, len) => from_utf8(&inner[..len as usize]).unwrap(), + } + } +} + +impl<'s> AsRef for CowStr<'s> { + fn as_ref(&self) -> &str { + self.deref() + } +} + +impl<'s> From for CowStr<'s> { + fn from(value: char) -> Self { + let mut inner = [0u8; MAX_INLINE_STR_LEN]; + value.encode_utf8(&mut inner); + CowStr::Inlined(inner, value.len_utf8() as u8) + } +} + +impl<'s> From<&'s str> for CowStr<'s> { + fn from(value: &'s str) -> Self { + CowStr::Borrowed(value) + } +} + +impl<'s> From for CowStr<'s> { + fn from(value: String) -> Self { + CowStr::Owned(value) + } +} + +impl<'s> Clone for CowStr<'s> { + fn clone(&self) -> Self { + match self { + CowStr::Owned(s) => { + let len = s.len(); + if len > MAX_INLINE_STR_LEN { + CowStr::Owned(s.clone()) + } else { + let mut inner = [0u8; MAX_INLINE_STR_LEN]; + inner[..len].copy_from_slice(s.as_bytes()); + CowStr::Inlined(inner, len as u8) + } + } + CowStr::Borrowed(s) => CowStr::Borrowed(s), + CowStr::Inlined(inner, len) => CowStr::Inlined(*inner, *len), + } + } +} + +impl<'s> PartialEq for CowStr<'s> { + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +impl<'s> Eq for CowStr<'s> {} + +impl<'s> Display for CowStr<'s> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.deref()) + } +} + +impl<'s, 'a> FromIterator<&'a str> for CowStr<'s> { + fn from_iter>(iter: T) -> Self { + CowStr::Owned(FromIterator::from_iter(iter)) + } +}