Skip to content

Commit

Permalink
feat: add custom CowStr type
Browse files Browse the repository at this point in the history
Related issue: #20
  • Loading branch information
kmaasrud committed Mar 27, 2023
1 parent 16491a4 commit 8fc2736
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 26 deletions.
13 changes: 3 additions & 10 deletions src/attr.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::CowStr;
use crate::DiscontinuousString;
use crate::Span;
use std::borrow::Cow;
use std::fmt;

use State::*;
Expand Down Expand Up @@ -114,19 +113,13 @@ impl<'s> Attributes<'s> {
}

pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool {
#[inline]
fn borrow(cow: CowStr) -> &str {
match cow {
Cow::Owned(_) => panic!(),
Cow::Borrowed(s) => s,
}
}

for elem in Parser::new(input.chars()) {
match elem {
Element::Class(c) => self.insert("class", input.src(c).into()),
Element::Identifier(i) => self.insert("id", input.src(i).into()),
Element::Attribute(a, v) => self.insert(borrow(input.src(a)), input.src(v).into()),
Element::Attribute(a, v) => {
self.insert(input.src(a).take_borrowed(), input.src(v).into())
}
Element::Invalid => return false,
}
}
Expand Down
25 changes: 9 additions & 16 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,16 @@ mod block;
mod inline;
mod lex;
mod span;
mod string;
mod tree;

use span::DiscontinuousString;
use span::Span;

pub use attr::{AttributeValue, AttributeValueParts, Attributes};
pub use string::CowStr;

type CowStr<'s> = std::borrow::Cow<'s, str>;
// type CowStr<'s> = std::borrow::Cow<'s, str>;

pub trait Render {
/// Push [`Event`]s to a unicode-accepting buffer or stream.
Expand Down Expand Up @@ -695,10 +697,7 @@ impl<'s> Parser<'s> {
inline::Container::InlineMath => Container::Math { display: false },
inline::Container::DisplayMath => Container::Math { display: true },
inline::Container::RawFormat => Container::RawInline {
format: match self.inlines.src(inline.span) {
CowStr::Owned(_) => panic!(),
CowStr::Borrowed(s) => s,
},
format: self.inlines.src(inline.span).take_borrowed(),
},
inline::Container::Subscript => Container::Subscript,
inline::Container::Superscript => Container::Superscript,
Expand All @@ -709,22 +708,22 @@ impl<'s> Parser<'s> {
inline::Container::Mark => Container::Mark,
inline::Container::InlineLink => Container::Link(
match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(),
s @ CowStr::Borrowed(_) => s,
s => s.replace("\n", ""),
},
LinkType::Span(SpanLinkType::Inline),
),
inline::Container::InlineImage => Container::Image(
match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(),
s @ CowStr::Borrowed(_) => s,
s => s.replace("\n", ""),
},
SpanLinkType::Inline,
),
inline::Container::ReferenceLink | inline::Container::ReferenceImage => {
let tag = match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', " ").into(),
s @ CowStr::Borrowed(_) => s,
s => s.replace("\n", " "),
};
let link_def =
self.pre_pass.link_definitions.get(tag.as_ref()).cloned();
Expand Down Expand Up @@ -762,10 +761,7 @@ impl<'s> Parser<'s> {
}
inline::EventKind::Atom(a) => match a {
inline::Atom::FootnoteReference => {
let tag = match self.inlines.src(inline.span) {
CowStr::Borrowed(s) => s,
CowStr::Owned(..) => panic!(),
};
let tag = self.inlines.src(inline.span).take_borrowed();
let number = self
.footnote_references
.iter()
Expand All @@ -778,10 +774,7 @@ impl<'s> Parser<'s> {
|i| i + 1,
);
Event::FootnoteReference(
match self.inlines.src(inline.span) {
CowStr::Borrowed(s) => s,
CowStr::Owned(..) => panic!(),
},
self.inlines.src(inline.span).take_borrowed(),
number,
)
}
Expand Down
136 changes: 136 additions & 0 deletions src/string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
use std::{borrow::Borrow, cmp::Ordering, fmt::Display, ops::Deref, str::from_utf8};

// Largest CowStr variant is Owned(String). A String uses 3 words of memory, but a fourth word is
// needed to hold the tag (the tag takes a byte, but a full word is used for alignment reasons.)
// This means that the available space we have for an inline string is 4 words - 2 bytes for the
// tag and length.
const MAX_INLINE_STR_LEN: usize = 4 * std::mem::size_of::<usize>() - 2;

#[derive(Debug)]
pub enum CowStr<'s> {
Owned(String),
Borrowed(&'s str),
Inlined([u8; MAX_INLINE_STR_LEN], u8),
}

impl<'s> CowStr<'s> {
pub fn take_borrowed(self) -> &'s str {
match self {
CowStr::Borrowed(s) => s,
CowStr::Owned(_) | CowStr::Inlined(..) => panic!(),
}
}

pub fn replace(self, from: &str, to: &str) -> Self {
if from.is_empty() {
return self;
}

match self {
CowStr::Owned(s) => CowStr::Owned(s.replace(from, to)),
CowStr::Inlined(mut inner, mut len) => {
for (start, _) in self.deref().match_indices(from) {
match from.len().cmp(&to.len()) {
Ordering::Less => {
len += (to.len() - from.len()) as u8;
if len as usize > MAX_INLINE_STR_LEN {
return CowStr::Owned(self.deref().replace(from, to));
}
inner[start + from.len()..].rotate_right(to.len() - from.len());
}
Ordering::Greater => {
inner[start..].rotate_left(from.len() - to.len());
len -= (from.len() - to.len()) as u8;
}
Ordering::Equal => {}
}

inner[start..start + to.len()].copy_from_slice(to.as_bytes());
}

CowStr::Inlined(inner, len)
}
CowStr::Borrowed(s) => CowStr::Owned(s.replace(from, to)),
}
}
}

impl<'s> Deref for CowStr<'s> {
type Target = str;

fn deref(&self) -> &Self::Target {
match *self {
Self::Owned(ref s) => s.borrow(),
Self::Borrowed(s) => s,
// NOTE: Inlined strings can only be constructed from strings or chars, which means they
// are guaranteed to be valid UTF-8. We could consider unchecked conversion as well, but
// a benchmark should be done before introducing unsafes.
Self::Inlined(ref inner, len) => from_utf8(&inner[..len as usize]).unwrap(),
}
}
}

impl<'s> AsRef<str> for CowStr<'s> {
fn as_ref(&self) -> &str {
self.deref()
}
}

impl<'s> From<char> for CowStr<'s> {
fn from(value: char) -> Self {
let mut inner = [0u8; MAX_INLINE_STR_LEN];
value.encode_utf8(&mut inner);
CowStr::Inlined(inner, value.len_utf8() as u8)
}
}

impl<'s> From<&'s str> for CowStr<'s> {
fn from(value: &'s str) -> Self {
CowStr::Borrowed(value)
}
}

impl<'s> From<String> for CowStr<'s> {
fn from(value: String) -> Self {
CowStr::Owned(value)
}
}

impl<'s> Clone for CowStr<'s> {
fn clone(&self) -> Self {
match self {
CowStr::Owned(s) => {
let len = s.len();
if len > MAX_INLINE_STR_LEN {
CowStr::Owned(s.clone())
} else {
let mut inner = [0u8; MAX_INLINE_STR_LEN];
inner[..len].copy_from_slice(s.as_bytes());
CowStr::Inlined(inner, len as u8)
}
}
CowStr::Borrowed(s) => CowStr::Borrowed(s),
CowStr::Inlined(inner, len) => CowStr::Inlined(*inner, *len),
}
}
}

impl<'s> PartialEq for CowStr<'s> {
fn eq(&self, other: &Self) -> bool {
self.deref() == other.deref()
}
}

impl<'s> Eq for CowStr<'s> {}

impl<'s> Display for CowStr<'s> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.deref())
}
}

impl<'s, 'a> FromIterator<&'a str> for CowStr<'s> {
fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
CowStr::Owned(FromIterator::from_iter(iter))
}
}

0 comments on commit 8fc2736

Please sign in to comment.