diff --git a/Cargo.toml b/Cargo.toml index e781e0a..d47bc03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ archive = ["ciborium"] csl-json = ["citationberg/json"] [dependencies] -citationberg = { git = "https://github.com/typst/citationberg.git", rev = "61ca6a7fcc48365f805e521cc8bc1f8f679ff372" } +citationberg = { git = "https://github.com/typst/citationberg.git", rev = "e3fd3f08e0e16983b7c3514b791b64c704dc2524" } indexmap = { version = "2.0.2", features = ["serde"] } numerals = "0.1.4" paste = "1.0.14" diff --git a/src/csl/mod.rs b/src/csl/mod.rs index 056fb62..1338922 100644 --- a/src/csl/mod.rs +++ b/src/csl/mod.rs @@ -8,8 +8,8 @@ use std::num::{NonZeroI16, NonZeroUsize}; use std::{mem, vec}; use citationberg::taxonomy::{ - DateVariable, Locator, NameVariable, NumberVariable, OtherTerm, StandardVariable, - Term, Variable, + DateVariable, Locator, NameVariable, NumberVariable, OtherTerm, PageVariable, + StandardVariable, Term, Variable, }; use citationberg::{ taxonomy as csl_taxonomy, Affixes, BaseLanguage, Citation, CitationFormat, Collapse, @@ -30,7 +30,7 @@ use self::elem::last_text_mut_child; pub use self::elem::{ BufWriteFormat, Elem, ElemChild, ElemChildren, ElemMeta, Formatted, Formatting, }; -use self::taxonomy::{EntryLike, NumberVariableResult}; +use self::taxonomy::{EntryLike, NumberVariableResult, PageVariableResult}; #[cfg(feature = "archive")] pub mod archive; @@ -2567,6 +2567,14 @@ impl<'a, T: EntryLike> Context<'a, T> { res } + fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option { + self.writing.prepare_variable_query(variable)?; + self.instance.resolve_page_variable(variable) + } + /// Resolve a name variable. /// /// Honors suppressions. diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index de27691..581ee2f 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -3,18 +3,21 @@ use std::fmt::Write; use std::str::FromStr; use citationberg::taxonomy::{ - Locator, NumberVariable, OtherTerm, StandardVariable, Term, Variable, + Locator, NumberOrPageVariable, NumberVariable, OtherTerm, PageVariable, + StandardVariable, Term, Variable, }; use citationberg::{ ChooseBranch, CslMacro, DateDayForm, DateMonthForm, DatePartName, DateParts, DateStrongAnyForm, GrammarGender, LabelPluralize, LayoutRenderingElement, - LongShortForm, NumberForm, TestPosition, TextCase, ToAffixes, ToFormatting, + LongShortForm, NumberForm, PageRangeFormat, TestPosition, TextCase, ToAffixes, + ToFormatting, }; use citationberg::{TermForm, TextTarget}; -use crate::csl::taxonomy::NumberVariableResult; +use crate::csl::taxonomy::{NumberVariableResult, PageVariableResult}; use crate::lang::{Case, SentenceCase, TitleCase}; use crate::types::{ChunkedString, Date, MaybeTyped, Numeric}; +use crate::PageRanges; use super::taxonomy::EntryLike; use super::{Context, ElemMeta, IbidState, SpecialForm, UsageInfo}; @@ -81,21 +84,19 @@ impl RenderCsl for citationberg::Text { } _ => ctx.push_chunked(&val), }, - ResolvedTextTarget::NumberVariable(var, n) => match n { + ResolvedTextTarget::NumberVariable(_, n) => match n { NumberVariableResult::Regular(MaybeTyped::Typed(num)) if num.will_transform() => { - render_typed_num(num.as_ref(), NumberForm::default(), var, None, ctx); - } - NumberVariableResult::Regular(n) - if matches!(var, NumberVariable::Page) => - { - // TODO: Remove this hack - ctx.push_str(&n.to_str().replace('-', "–")) + render_typed_num(num.as_ref(), NumberForm::default(), None, ctx); } NumberVariableResult::Regular(n) => ctx.push_str(&n.to_str()), NumberVariableResult::Transparent(n) => ctx.push_transparent(n), }, + ResolvedTextTarget::PageVariable(p) => match p { + MaybeTyped::Typed(r) => render_page_range(&r, ctx), + MaybeTyped::String(s) => ctx.push_str(&s.replace('-', "–")), + }, ResolvedTextTarget::Macro(mac) => { for child in &mac.children { child.render(ctx); @@ -138,6 +139,9 @@ impl RenderCsl for citationberg::Text { match target { ResolvedTextTarget::StandardVariable(s, _) => var == Variable::Standard(s), ResolvedTextTarget::NumberVariable(n, _) => var == Variable::Number(n), + ResolvedTextTarget::PageVariable(_) => { + var == Variable::Page(PageVariable::Page) + } ResolvedTextTarget::Macro(mac) => { mac.children.iter().any(|c| c.will_render(ctx, var)) } @@ -170,7 +174,8 @@ impl RenderCsl for citationberg::Text { (false, UsageInfo { has_vars: true, ..Default::default() }) } ResolvedTextTarget::StandardVariable(_, _) - | ResolvedTextTarget::NumberVariable(_, _) => ( + | ResolvedTextTarget::NumberVariable(_, _) + | ResolvedTextTarget::PageVariable(_) => ( true, UsageInfo { has_vars: true, @@ -198,6 +203,7 @@ impl RenderCsl for citationberg::Text { enum ResolvedTextTarget<'a, 'b> { StandardVariable(StandardVariable, Cow<'a, ChunkedString>), NumberVariable(NumberVariable, NumberVariableResult<'a>), + PageVariable(PageVariableResult), Macro(&'a CslMacro), Term(&'a str), Value(&'b str), @@ -243,6 +249,9 @@ impl<'a, 'b> ResolvedTextTarget<'a, 'b> { TextTarget::Variable { var: Variable::Number(var), .. } => ctx .resolve_number_variable(*var) .map(|n| ResolvedTextTarget::NumberVariable(*var, n)), + TextTarget::Variable { var: Variable::Page(pv), .. } => { + ctx.resolve_page_variable(*pv).map(ResolvedTextTarget::PageVariable) + } TextTarget::Variable { .. } => None, TextTarget::Macro { name } => { ctx.style.get_macro(name).map(ResolvedTextTarget::Macro) @@ -278,7 +287,7 @@ impl RenderCsl for citationberg::Number { Some(NumberVariableResult::Regular(MaybeTyped::Typed(num))) if num.will_transform() => { - render_typed_num(num.as_ref(), self.form, self.variable, gender, ctx); + render_typed_num(num.as_ref(), self.form, gender, ctx); } Some(NumberVariableResult::Regular(MaybeTyped::Typed(num))) => { write!(ctx, "{}", num).unwrap() @@ -345,41 +354,31 @@ impl RenderCsl for citationberg::Number { fn render_typed_num( num: &Numeric, form: NumberForm, - variable: NumberVariable, gender: Option, ctx: &mut Context, ) { - let normal_num = if form == NumberForm::Numeric && variable == NumberVariable::Page { - if let Some(range) = num.range() { - render_page_range(range, ctx); - false - } else { - true - } - } else { - true - }; - - if normal_num { - num.with_form(ctx, form, gender, ctx.ordinal_lookup()).unwrap(); - } + num.with_form(ctx, form, gender, &ctx.ordinal_lookup()).unwrap(); } -fn render_page_range( - range: std::ops::RangeInclusive, - ctx: &mut Context, -) { - ctx.style - .csl - .settings - .page_range_format - .unwrap_or_default() - .format( - range, - ctx, - ctx.term(OtherTerm::PageRangeDelimiter.into(), TermForm::default(), false) - .or(Some("–")), - ) +fn render_page_range(range: &PageRanges, ctx: &mut Context) { + let format = ctx.style.csl.settings.page_range_format.unwrap_or_default(); + let delim = ctx + .term(OtherTerm::PageRangeDelimiter.into(), TermForm::default(), false) + .or(Some("–")); + + range + .ranges + .iter() + .try_for_each(|r| match r { + crate::PageRangesPart::Ampersand => ctx.write_str(" & "), + crate::PageRangesPart::Comma => ctx.write_str(", "), + crate::PageRangesPart::EscapedRange(start, end) => PageRangeFormat::Expanded + .format(ctx, &start.to_string(), &end.to_string(), delim), + crate::PageRangesPart::SinglePage(page) => ctx.write_str(&page.to_string()), + crate::PageRangesPart::Range(start, end) => { + format.format(ctx, &start.to_string(), &end.to_string(), delim) + } + }) .unwrap(); } @@ -393,7 +392,11 @@ fn label_pluralization( LabelPluralize::Contextual => match variable { NumberVariableResult::Regular(MaybeTyped::String(_)) => false, NumberVariableResult::Regular(MaybeTyped::Typed(n)) => { - n.is_plural(label.variable.is_number_of_variable()) + if let NumberOrPageVariable::Number(v) = label.variable { + n.is_plural(v.is_number_of_variable()) + } else { + panic!("Incompatiable variable types") + } } NumberVariableResult::Transparent(_) => false, }, @@ -406,19 +409,40 @@ impl RenderCsl for citationberg::Label { return; } - let Some(variable) = ctx.resolve_number_variable(self.variable) else { - return; - }; + match self.variable { + NumberOrPageVariable::Number(n) => { + let Some(variable) = ctx.resolve_number_variable(n) else { + return; + }; - let depth = ctx.push_elem(citationberg::Formatting::default()); - let plural = label_pluralization(self, variable); + let depth = ctx.push_elem(citationberg::Formatting::default()); + let plural = label_pluralization(self, variable); - let content = ctx - .term(Term::from(self.variable), self.label.form, plural) - .unwrap_or_default(); + let content = ctx + .term(Term::from(self.variable), self.label.form, plural) + .unwrap_or_default(); - render_label_with_var(&self.label, ctx, content); - ctx.commit_elem(depth, None, Some(ElemMeta::Label)); + render_label_with_var(&self.label, ctx, content); + ctx.commit_elem(depth, None, Some(ElemMeta::Label)); + } + NumberOrPageVariable::Page(pv) => { + let Some(p) = ctx.resolve_page_variable(pv) else { + return; + }; + + let depth = ctx.push_elem(citationberg::Formatting::default()); + let plural = match p { + MaybeTyped::Typed(p) => p.is_plural(), + _ => false, + }; + + let content = + ctx.term(Term::from(pv), self.label.form, plural).unwrap_or_default(); + + render_label_with_var(&self.label, ctx, content); + ctx.commit_elem(depth, None, Some(ElemMeta::Label)); + } + } } fn will_render(&self, _ctx: &mut Context, _var: Variable) -> bool { @@ -427,7 +451,9 @@ impl RenderCsl for citationberg::Label { fn will_have_info(&self, ctx: &mut Context) -> (bool, UsageInfo) { match ctx.instance.kind { - Some(SpecialForm::VarOnly(Variable::Number(n))) if self.variable != n => { + Some(SpecialForm::VarOnly(Variable::Number(n))) + if self.variable != NumberOrPageVariable::Number(n) => + { return (false, UsageInfo::default()); } Some( @@ -435,7 +461,8 @@ impl RenderCsl for citationberg::Label { | SpecialForm::OnlyFirstDate | SpecialForm::OnlyYearSuffix, ) => { - if self.variable != NumberVariable::Locator { + if self.variable != NumberOrPageVariable::Number(NumberVariable::Locator) + { return (true, UsageInfo::default()); } } @@ -443,7 +470,7 @@ impl RenderCsl for citationberg::Label { } // Never yield a label if the locator is set to custom. - if self.variable == NumberVariable::Locator + if self.variable == NumberOrPageVariable::Number(NumberVariable::Locator) && ctx .instance .cite_props @@ -454,14 +481,33 @@ impl RenderCsl for citationberg::Label { return (false, UsageInfo::default()); } - if let Some(num) = ctx.resolve_number_variable(self.variable) { - let plural = label_pluralization(self, num); - ( - ctx.term(Term::from(self.variable), self.label.form, plural).is_some(), - UsageInfo::default(), - ) - } else { - (false, UsageInfo::default()) + match self.variable { + NumberOrPageVariable::Number(n) => { + if let Some(num) = ctx.resolve_number_variable(n) { + let plural = label_pluralization(self, num); + ( + ctx.term(Term::from(self.variable), self.label.form, plural) + .is_some(), + UsageInfo::default(), + ) + } else { + (false, UsageInfo::default()) + } + } + NumberOrPageVariable::Page(pv) => { + if let Some(p) = ctx.resolve_page_variable(pv) { + let plural = match p { + MaybeTyped::Typed(p) => p.is_plural(), + _ => false, + }; + ( + ctx.term(Term::from(pv), self.label.form, plural).is_some(), + UsageInfo::default(), + ) + } else { + (false, UsageInfo::default()) + } + } } } } @@ -1091,6 +1137,9 @@ impl<'a, 'b, T: EntryLike> Iterator for BranchConditionIter<'a, 'b, T> { Variable::Name(n) => { !self.ctx.resolve_name_variable(n).is_empty() } + Variable::Page(pv) => { + self.ctx.resolve_page_variable(pv).is_some() + } }) } else { None diff --git a/src/csl/sort.rs b/src/csl/sort.rs index bcf9380..cab43dc 100644 --- a/src/csl/sort.rs +++ b/src/csl/sort.rs @@ -82,6 +82,19 @@ impl<'a> StyleContext<'a> { (None, None) => Ordering::Equal, } } + SortKey::Variable { variable: Variable::Page(pv), .. } => { + let a = + InstanceContext::sort_instance(a, a_idx).resolve_page_variable(*pv); + let b = + InstanceContext::sort_instance(b, b_idx).resolve_page_variable(*pv); + + match (a, b) { + (Some(a), Some(b)) => a.csl_cmp(&b), + (Some(_), None) => Ordering::Greater, + (None, Some(_)) => Ordering::Less, + (None, None) => Ordering::Equal, + } + } SortKey::MacroName { name, names_min, diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index 4f4bb72..5cf853e 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -5,9 +5,9 @@ use std::str::FromStr; use crate::types::{ ChunkedString, Date, EntryType, MaybeTyped, Numeric, Person, PersonRole, StringChunk, }; -use crate::Entry; +use crate::{Entry, PageRanges}; use citationberg::taxonomy::{ - DateVariable, Kind, NameVariable, NumberVariable, StandardVariable, + DateVariable, Kind, NameVariable, NumberVariable, PageVariable, StandardVariable, }; use citationberg::{taxonomy, LongShortForm}; use unic_langid::LanguageIdentifier; @@ -23,6 +23,10 @@ pub trait EntryLike { &self, variable: NumberVariable, ) -> Option>>; + fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option>; fn resolve_standard_variable( &self, form: LongShortForm, @@ -70,6 +74,13 @@ impl<'a, T: EntryLike> InstanceContext<'a, T> { } } + pub(super) fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option { + self.entry.resolve_page_variable(variable) + } + // Number variables are standard variables. pub(super) fn resolve_standard_variable( &self, @@ -97,6 +108,8 @@ pub(super) enum NumberVariableResult<'a> { Transparent(usize), } +pub(super) type PageVariableResult = MaybeTyped; + impl<'a> NumberVariableResult<'a> { pub(super) fn from_regular(regular: MaybeTyped>) -> Self { Self::Regular(regular) @@ -157,14 +170,10 @@ impl EntryLike for Entry { NumberVariable::NumberOfVolumes => { self.volume_total().map(|n| MaybeTyped::Typed(Cow::Borrowed(n))) } - NumberVariable::Page => self.page_range().map(MaybeTyped::to_cow), NumberVariable::PageFirst => self .page_range() - .and_then(|r| match r { - MaybeTyped::Typed(r) => r.range(), - MaybeTyped::String(_) => None, - }) - .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(*r.start())))), + .and_then(PageRanges::first) + .map(|r| MaybeTyped::Typed(Cow::Owned(r.clone()))), NumberVariable::PartNumber => self .bound_select( &select!( @@ -201,6 +210,15 @@ impl EntryLike for Entry { } } + fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option> { + match variable { + PageVariable::Page => self.page_range().map(|r| MaybeTyped::Typed(r.clone())), + } + } + // Number variables are standard variables. fn resolve_standard_variable( &self, @@ -755,17 +773,36 @@ impl EntryLike for citationberg::json::Item { } } + fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option> { + match variable { + PageVariable::Page => match self.0.get("page")? { + csl_json::Value::Number(n) => { + Some(MaybeTyped::Typed(PageRanges::from(*n as u64))) + } + csl_json::Value::String(s) => { + let res = MaybeTyped::::infallible_from_str(s); + Some(match res { + MaybeTyped::String(s) => MaybeTyped::String(s), + MaybeTyped::Typed(r) => MaybeTyped::Typed(r), + }) + } + _ => None, + }, + } + } + fn resolve_number_variable( &self, variable: NumberVariable, ) -> Option>> { if matches!(variable, NumberVariable::PageFirst) { - if let Some(MaybeTyped::Typed(Cow::Owned(n))) = - self.resolve_number_variable(NumberVariable::Page) + if let Some(MaybeTyped::Typed(n)) = + self.resolve_page_variable(PageVariable::Page) { - return n - .range() - .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(*r.start())))); + return n.first().map(|r| MaybeTyped::Typed(Cow::Owned(r.clone()))); } } match self.0.get(&variable.to_string())? { diff --git a/src/interop.rs b/src/interop.rs index 298b29e..eb410bf 100644 --- a/src/interop.rs +++ b/src/interop.rs @@ -1,6 +1,7 @@ //! Provides conversion methods for BibLaTeX. use std::convert::TryFrom; +use std::str::FromStr; use biblatex as tex; use tex::{ @@ -481,37 +482,23 @@ impl TryFrom<&tex::Entry> for Entry { if let Some(pages) = map_res(entry.pages())? { item.set_page_range(match pages { - PermissiveType::Typed(pages) => { - if let Some(n) = - pages.first().filter(|f| pages.len() == 1 && f.start == f.end) - { - MaybeTyped::Typed(Numeric::new(n.start as i32)) - } else { - let mut items = vec![]; - for (i, pair) in pages.iter().enumerate() { - let last = i + 1 == pages.len(); - let last_delim = (!last).then_some(NumericDelimiter::Comma); - - if pair.start == pair.end { - items.push((pair.start as i32, last_delim)); + PermissiveType::Typed(pages) => PageRanges::new( + pages + .into_iter() + .map(|p| { + if p.start == p.end { + PageRangesPart::SinglePage(Numeric::from(p.start)) } else { - items.push(( - pair.start as i32, - Some(NumericDelimiter::Hyphen), - )); - items.push((pair.end as i32, last_delim)); + PageRangesPart::Range( + Numeric::from(p.start), + Numeric::from(p.end), + ) } - } - - MaybeTyped::Typed(Numeric { - value: NumericValue::Set(items), - prefix: None, - suffix: None, }) - } - } + .collect(), + ), PermissiveType::Chunks(chunks) => { - MaybeTyped::infallible_from_str(&chunks.format_verbatim()) + PageRanges::from_str(&chunks.format_verbatim()).unwrap() } }); } diff --git a/src/lib.rs b/src/lib.rs index 16cd0fe..565289c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -522,7 +522,7 @@ entry! { /// Published version of an item. "edition" => edition: MaybeTyped, /// The range of pages within the parent this item occupies - "page-range" => page_range: MaybeTyped, + "page-range" => page_range: PageRanges, /// The total number of pages the item has. "page-total" => page_total: Numeric, /// The time range within the parent this item starts and ends at. diff --git a/src/types/mod.rs b/src/types/mod.rs index cd6be34..02c6cb5 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -12,11 +12,13 @@ use thiserror::Error; use url::Url; pub use numeric::*; +pub use page::*; pub use persons::*; pub use strings::*; pub use time::*; mod numeric; +mod page; mod persons; mod strings; mod time; diff --git a/src/types/numeric.rs b/src/types/numeric.rs index cd1be5b..9716cf3 100644 --- a/src/types/numeric.rs +++ b/src/types/numeric.rs @@ -160,7 +160,7 @@ impl Numeric { buf: &mut T, form: NumberForm, gender: Option, - ords: OrdinalLookup<'_>, + ords: &OrdinalLookup<'_>, ) -> std::fmt::Result where T: Write, @@ -217,41 +217,14 @@ impl Numeric { .flatten() } - /// Returns a range if the value is a range. - pub fn range(&self) -> Option> { - self.value.range() - } - /// Returns the nth number in the set. pub fn nth(&self, n: usize) -> Option { - match &self.value { - NumericValue::Number(val) if n == 0 => Some(*val), - NumericValue::Number(_) => None, - NumericValue::Set(vec) => vec.get(n).map(|(val, _)| *val), - } + self.value.nth(n) } /// Order the values according to CSL rules. pub(crate) fn csl_cmp(&self, other: &Self) -> std::cmp::Ordering { - let mut i = 0; - loop { - let a = self.nth(i); - let b = other.nth(i); - - match (a, b) { - (Some(a), Some(b)) => { - let ord = a.cmp(&b); - if ord != std::cmp::Ordering::Equal { - return ord; - } - } - (Some(_), None) => return std::cmp::Ordering::Greater, - (None, Some(_)) => return std::cmp::Ordering::Less, - (None, None) => return std::cmp::Ordering::Equal, - } - - i += 1; - } + self.value.into_iter().cmp(&other.value) } } @@ -375,48 +348,65 @@ pub enum NumericValue { } impl NumericValue { - /// Returns a range if the value is a range. - pub fn range(&self) -> Option> { + /// Return the length of the numeric value. + pub fn len(&self) -> usize { match self { - // A single number is seen as a range of length 1. See #103. - Self::Number(n) => Some(*n..=*n), - Self::Set(vec) => { - if vec.len() == 2 { - let start = vec[0].0; - let end = vec[1].0; - let first_delim = vec[0].1; - - let first_delim_ampersand_range = first_delim - == Some(NumericDelimiter::Ampersand) - && start + 1 == end; - - if (first_delim_ampersand_range) - || first_delim == Some(NumericDelimiter::Hyphen) - { - Some(start..=end) - } else { - None - } - } else if vec.len() > 2 { - for i in 1..vec.len() { - if vec[i - 1].1 != Some(NumericDelimiter::Ampersand) { - return None; - } + NumericValue::Number(_) => 1, + NumericValue::Set(vec) => vec.len(), + } + } - if vec[i - 1].0 + 1 != vec[i].0 { - return None; - } - } + /// Whether the numeric value is an empty set. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } - Some(vec[0].0..=vec[vec.len() - 1].0) - } else { - None - } - } + /// Returns the nth number in the set. + fn nth(&self, n: usize) -> Option { + match self { + NumericValue::Number(val) if n == 0 => Some(*val), + NumericValue::Number(_) => None, + NumericValue::Set(vec) => vec.get(n).map(|(val, _)| *val), } } } +/// An iterator over the numbers in a numeric value. +pub struct NumIterator<'a> { + num: &'a NumericValue, + idx: usize, +} + +impl<'a> Iterator for NumIterator<'a> { + type Item = i32; + + fn next(&mut self) -> Option { + let val = self.num.nth(self.idx); + self.idx += 1; + val + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.num.len() - self.idx; + (len, Some(len)) + } +} + +impl<'a> ExactSizeIterator for NumIterator<'a> { + fn len(&self) -> usize { + self.size_hint().0 + } +} + +impl<'a> IntoIterator for &'a NumericValue { + type Item = i32; + type IntoIter = NumIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + NumIterator { num: self, idx: 0 } + } +} + /// Delimits individual numbers in a numeric value. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum NumericDelimiter { diff --git a/src/types/page.rs b/src/types/page.rs new file mode 100644 index 0000000..1efb632 --- /dev/null +++ b/src/types/page.rs @@ -0,0 +1,338 @@ +use std::{cmp::Ordering, fmt::Display, num::NonZeroUsize, str::FromStr}; + +use crate::{MaybeTyped, Numeric, NumericError}; + +use super::{deserialize_from_str, serialize_display}; +use serde::{de, Deserialize, Serialize}; +use thiserror::Error; + +impl MaybeTyped { + /// Order the values according to CSL rules. + pub(crate) fn csl_cmp(&self, other: &Self) -> std::cmp::Ordering { + match (self, other) { + (MaybeTyped::Typed(a), MaybeTyped::Typed(b)) => a.csl_cmp(b), + _ => self.to_string().cmp(&other.to_string()), + } + } +} + +/// Ranges of page numbers, e.g., `1-4, 5 & 6`. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PageRanges { + /// The given ranges. + pub ranges: Vec, +} + +impl PageRanges { + /// Create a new `PageRanges` struct. + pub fn new(ranges: Vec) -> Self { + Self { ranges } + } + + /// Get the first page of the first range. + pub fn first(&self) -> Option<&Numeric> { + self.ranges.first().and_then(PageRangesPart::start) + } + + /// Order the values according to CSL rules. + pub(crate) fn csl_cmp(&self, other: &Self) -> std::cmp::Ordering { + #[derive(PartialEq, Eq)] + struct OrderablePageRangesPart<'a>(&'a PageRangesPart); + + impl Ord for OrderablePageRangesPart<'_> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.csl_cmp(other.0) + } + } + + impl PartialOrd for OrderablePageRangesPart<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + + self.ranges + .iter() + .map(OrderablePageRangesPart) + .cmp(other.ranges.iter().map(OrderablePageRangesPart)) + } + + /// Whether to pluralize the `pages` term, when used with this page range. + pub fn is_plural(&self) -> bool { + self.ranges.len() != 1 + } +} + +impl From for PageRanges { + fn from(value: u64) -> Self { + Self { ranges: vec![value.into()] } + } +} + +impl Display for PageRanges { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.ranges.iter().try_for_each(|r| r.fmt(f)) + } +} + +impl FromStr for PageRanges { + type Err = PageRangesPartErr; + + fn from_str(s: &str) -> Result { + // Split input into different ranges separated by `&` or `,` + Ok(Self { + ranges: group_by(s, |c, d| !(c == ',' || c == '&' || d == ',' || d == '&')) + .map(PageRangesPart::from_str) + .collect::>()?, + }) + } +} + +/// Parts of the page ranges. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum PageRangesPart { + /// An and, i.e, `&`. + Ampersand, + /// A comma, i.e., `,`. + Comma, + /// An escaped range with start and end, e.g., `1\-4`. + EscapedRange(Numeric, Numeric), + /// A single page, e.g., `5`. + SinglePage(Numeric), + /// A full range, e.g., `1n8--1n14`. + Range(Numeric, Numeric), +} + +impl PageRangesPart { + /// The start of a range if any. + pub fn start(&self) -> Option<&Numeric> { + match self { + Self::EscapedRange(s, _) => Some(s), + Self::SinglePage(s) => Some(s), + Self::Range(s, _) => Some(s), + _ => None, + } + } + + /// Order the values according to CSL rules. + pub(crate) fn csl_cmp(&self, other: &Self) -> std::cmp::Ordering { + match (self, other) { + (Self::Ampersand, Self::Ampersand) => Ordering::Equal, + (Self::Ampersand, _) => Ordering::Less, + (_, Self::Ampersand) => Ordering::Greater, + (Self::Comma, Self::Comma) => Ordering::Equal, + (Self::Comma, _) => Ordering::Less, + (_, Self::Comma) => Ordering::Greater, + (Self::SinglePage(n1), Self::SinglePage(n2)) => n1.csl_cmp(n2), + (Self::SinglePage(_), _) => Ordering::Less, + (_, Self::SinglePage(_)) => Ordering::Greater, + (Self::EscapedRange(s1, e1), Self::EscapedRange(s2, e2)) => { + let ord = s1.csl_cmp(s2); + if ord != Ordering::Equal { + return ord; + } + e1.csl_cmp(e2) + } + (Self::EscapedRange(_, _), _) => Ordering::Less, + (_, Self::EscapedRange(_, _)) => Ordering::Greater, + (Self::Range(s1, e1), Self::Range(s2, e2)) => { + let ord = s1.csl_cmp(s2); + if ord != Ordering::Equal { + return ord; + } + e1.csl_cmp(e2) + } + } + } +} + +impl From for PageRangesPart { + fn from(value: u64) -> Self { + Self::SinglePage((value as u32).into()) + } +} + +impl Display for PageRangesPart { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + PageRangesPart::Ampersand => "&", + PageRangesPart::Comma => ", ", + PageRangesPart::EscapedRange(s, e) => return write!(f, "{s}-{e}"), + PageRangesPart::SinglePage(s) => return write!(f, "{s}"), + PageRangesPart::Range(s, e) => return write!(f, "{s}-{e}"), + }; + Display::fmt(s, f) + } +} + +/// Parsing error for page ranges. +#[derive(Debug, Clone, Copy, Error)] +pub enum PageRangesPartErr { + /// The string is malformed. + #[error("page range string malformed")] + Malformed, + /// The string is empty. + #[error("page range is empty")] + Empty, + /// An error from parsing a numeric value. + #[error("page range contained invalid numeric value")] + NumericErr(#[from] NumericError), +} + +impl FromStr for PageRangesPart { + type Err = PageRangesPartErr; + + fn from_str(s: &str) -> Result { + let s = s.trim(); + if s.is_empty() { + return Err(PageRangesPartErr::Empty); + } + let p = if s == "&" { + Self::Ampersand + } else if s == "," { + Self::Comma + } else if s.contains("\\-") { + // If `-` chars are escaped, write `-`. + let mut parts = s.split("\\-").map(str::trim); + + let start = parts.next().ok_or(PageRangesPartErr::Empty)?; + let end = parts.next().ok_or(PageRangesPartErr::Empty)?; + + let r = Self::EscapedRange(parse_number(start)?, parse_number(end)?); + if parts.next().is_some() { + return Err(PageRangesPartErr::Malformed); + } + r + } else { + // Otherwise, split into the two halves of the dash. + let mut parts = s.split(['-', '–']).map(str::trim); + let r = match (parts.next(), parts.next()) { + (None, None) => unreachable!(), + (Some(start), None) => Self::SinglePage(parse_number(start)?), + (Some(start), Some(end)) => { + Self::Range(parse_number(start)?, parse_number(end)?) + } + _ => unreachable!(), + }; + if parts.next().is_some() { + return Err(PageRangesPartErr::Malformed); + } + r + }; + Ok(p) + } +} + +deserialize_from_str!(PageRanges); +serialize_display!(PageRanges); + +fn parse_number(s: &str) -> Result { + Numeric::from_str(s) +} + +/// Split `s` into maximal chunks such that two successive chars satisfy `pred`. +/// +/// Returns an iterator over these chunks. +pub(crate) fn group_by(s: &str, pred: F) -> GroupBy<'_, F> +where + F: FnMut(char, char) -> bool, +{ + GroupBy::new(s, pred) +} + +/// An iterator over string slice in (non-overlapping) chunks separated by a predicate. +/// +/// Adapted from the nightly std. +pub(crate) struct GroupBy<'a, P> { + string: &'a str, + predicate: P, +} + +impl<'a, P> GroupBy<'a, P> { + pub(crate) fn new(string: &'a str, predicate: P) -> Self { + GroupBy { string, predicate } + } +} + +impl<'a, P> Iterator for GroupBy<'a, P> +where + P: FnMut(char, char) -> bool, +{ + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option { + if self.string.is_empty() { + None + } else { + let mut len = 1; + for w in windows(self.string, 2) { + let chars: Vec<_> = w.chars().collect(); + let (c, d) = (chars[0], chars[1]); + if (self.predicate)(c, d) { + len += c.len_utf8(); + } else { + break; + } + } + let (head, tail) = self.string.split_at(len); + self.string = tail; + Some(head) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.string.chars().size_hint() + } +} + +/// Return an iterator of sliding windows of size `size` over `string`. +/// +/// # Panic +/// +/// Panics if `size` is zero. +pub(crate) fn windows(string: &str, size: usize) -> Windows<'_> { + assert!(size > 0); + Windows::new(string, NonZeroUsize::new(size).unwrap()) +} + +/// An iterator of sliding windows of size `size` over `string`. +/// +/// Each call of `next` advanced the window by one. +pub(crate) struct Windows<'a> { + string: &'a str, + size: NonZeroUsize, +} + +impl<'a> Windows<'a> { + pub(crate) fn new(string: &'a str, size: NonZeroUsize) -> Self { + Self { string, size } + } +} + +impl<'a> Iterator for Windows<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + let size = self.size.get(); + if size > self.string.len() { + None + } else { + let mut indices = self.string.char_indices(); + let next = indices.nth(1).unwrap().0; + match indices.nth(size - 2) { + Some((idx, _)) => { + let ret = Some(&self.string[..idx]); + self.string = &self.string[next..]; + ret + } + None => { + let ret = Some(self.string); + self.string = ""; + ret + } + } + } + } +} diff --git a/tests/citeproc-pass.txt b/tests/citeproc-pass.txt index c10d743..f132d31 100644 --- a/tests/citeproc-pass.txt +++ b/tests/citeproc-pass.txt @@ -274,6 +274,7 @@ nameorder_ShortDemoteDisplayAndSort nameorder_ShortNameAsSortDemoteNever namespaces_NonNada3 number_IsNumericWithAlpha +number_MixedPageRange number_PageFirst number_PageRange number_SimpleNumberArabic