From 3f83bd258ab71c65737d468329a981181a8808b9 Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Wed, 20 Mar 2024 17:27:01 +0100 Subject: [PATCH 01/22] Change page range construction for single numbers --- src/types/numeric.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types/numeric.rs b/src/types/numeric.rs index 68fe241..c099131 100644 --- a/src/types/numeric.rs +++ b/src/types/numeric.rs @@ -379,7 +379,7 @@ impl NumericValue { pub fn range(&self) -> Option> { match self { // A single number is seen as a range of length 1. See #103. - Self::Number(n) => Some(*n..(*n + 1)), + Self::Number(n) => Some(*n..*n), Self::Set(vec) => { if vec.len() == 2 { let start = vec[0].0; From be39404f7f01d93f13bc4b25a54c222768a8fe10 Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Thu, 21 Mar 2024 08:18:31 +0100 Subject: [PATCH 02/22] Switch to InclusiveRange --- src/types/numeric.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/types/numeric.rs b/src/types/numeric.rs index c099131..2c3f901 100644 --- a/src/types/numeric.rs +++ b/src/types/numeric.rs @@ -218,7 +218,7 @@ impl Numeric { } /// Returns a range if the value is a range. - pub fn range(&self) -> Option> { + pub fn range(&self) -> Option> { self.value.range() } @@ -376,10 +376,10 @@ pub enum NumericValue { impl NumericValue { /// Returns a range if the value is a range. - pub fn range(&self) -> Option> { + pub fn range(&self) -> Option> { match self { // A single number is seen as a range of length 1. See #103. - Self::Number(n) => Some(*n..*n), + Self::Number(n) => Some(*n..=*n), Self::Set(vec) => { if vec.len() == 2 { let start = vec[0].0; @@ -391,10 +391,10 @@ impl NumericValue { || (first_delim == Some(NumericDelimiter::Ampersand) && start + 1 == end)) { - Some(start..end) + Some(start..=end) } else if first_delim == Some(NumericDelimiter::Hyphen) { // Handle shorthand notation like `100-4` for `100-104` - Some(start..end) + Some(start..=end) } else { None } @@ -409,7 +409,7 @@ impl NumericValue { } } - Some(vec[0].0..vec[vec.len() - 1].0) + Some(vec[0].0..=vec[vec.len() - 1].0) } else { None } From 23befaaa9b7b7e29bac672adbe8019629ce0f451 Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 9 Apr 2024 15:26:47 +0200 Subject: [PATCH 03/22] Update citationberg --- Cargo.toml | 4 ++-- src/csl/rendering/mod.rs | 5 ++++- src/csl/taxonomy.rs | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 95770cf..e39915e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ archive = ["ciborium"] csl-json = ["citationberg/json"] [dependencies] -citationberg = "0.3.0" +citationberg = { git = "https://github.com/typst/citationberg.git" } indexmap = { version = "2.0.2", features = ["serde"] } numerals = "0.1.4" paste = "1.0.14" @@ -27,7 +27,7 @@ thiserror = "1.0.48" unic-langid = { version = "0.9.0", features = ["serde"] } unicode-segmentation = "1.6.0" unscanny = "0.1.0" -url = { version = "2.4", features = ["serde"] } +url = { version = "2.4", features = ["serde"] } biblatex = { version = "0.9", optional = true } ciborium = { version = "0.2.1", optional = true } clap = { version = "4", optional = true, features = ["cargo"] } diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index e26af6a..7f86416 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -365,7 +365,10 @@ fn render_typed_num( } } -fn render_page_range(range: std::ops::Range, ctx: &mut Context) { +fn render_page_range( + range: std::ops::RangeInclusive, + ctx: &mut Context, +) { ctx.style .csl .settings diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index c691ed3..c6a90da 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -164,7 +164,7 @@ impl EntryLike for Entry { MaybeTyped::Typed(r) => r.range(), MaybeTyped::String(_) => None, }) - .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(r.start)))), + .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(*r.start())))), NumberVariable::PartNumber => self .bound_select( &select!( From 4e68404c62220cf957e4a90378eb762111dcf4f6 Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 9 Apr 2024 15:29:22 +0200 Subject: [PATCH 04/22] Fix JSON taxonomy --- src/csl/taxonomy.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index c6a90da..4f4bb72 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -765,7 +765,7 @@ impl EntryLike for citationberg::json::Item { { return n .range() - .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(r.start)))); + .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(*r.start())))); } } match self.0.get(&variable.to_string())? { From 26540cbe1f72c8a945d78d69c2eeeb099c1f2cc2 Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 9 Apr 2024 15:35:48 +0200 Subject: [PATCH 05/22] Add passing test --- tests/citeproc-pass.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/citeproc-pass.txt b/tests/citeproc-pass.txt index 740f3c3..0853321 100644 --- a/tests/citeproc-pass.txt +++ b/tests/citeproc-pass.txt @@ -17,6 +17,7 @@ bugreports_SingletonIfMatchNoneFail bugreports_TitleCase bugreports_YearSuffixLingers bugreports_disambiguate +bugreports_effingBug bugreports_undefinedCrash collapse_AuthorCollapse collapse_AuthorCollapseDifferentAuthorsOneWithEtAl From b179c5d9a2479c89c7303e4beed78a2a83d8f920 Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 23 Apr 2024 16:58:58 +0200 Subject: [PATCH 06/22] Change handling of page ranges --- src/csl/rendering/mod.rs | 39 ++++--- src/csl/taxonomy.rs | 13 +-- src/interop.rs | 41 +++---- src/lib.rs | 2 +- src/types/mod.rs | 2 + src/types/numeric.rs | 58 ++++------ src/types/page.rs | 238 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 304 insertions(+), 89 deletions(-) create mode 100644 src/types/page.rs diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index 7f86416..c4623ec 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -8,13 +8,15 @@ use citationberg::taxonomy::{ use citationberg::{ ChooseBranch, CslMacro, DateDayForm, DateMonthForm, DatePartName, DateParts, DateStrongAnyForm, GrammarGender, LabelPluralize, LayoutRenderingElement, - LongShortForm, NumberForm, TestPosition, TextCase, ToAffixes, ToFormatting, + LongShortForm, NumberForm, PageRangeFormat, TestPosition, TextCase, ToAffixes, + ToFormatting, }; use citationberg::{TermForm, TextTarget}; use crate::csl::taxonomy::NumberVariableResult; use crate::lang::{Case, SentenceCase, TitleCase}; use crate::types::{ChunkedString, Date, MaybeTyped, Numeric}; +use crate::PageRanges; use super::taxonomy::EntryLike; use super::{Context, ElemMeta, IbidState, SpecialForm, UsageInfo}; @@ -365,21 +367,26 @@ fn render_typed_num( } } -fn render_page_range( - range: std::ops::RangeInclusive, - ctx: &mut Context, -) { - ctx.style - .csl - .settings - .page_range_format - .unwrap_or_default() - .format( - range, - ctx, - ctx.term(OtherTerm::PageRangeDelimiter.into(), TermForm::default(), false) - .or(Some("–")), - ) +fn render_page_range(range: PageRanges, ctx: &mut Context) { + let format = ctx.style.csl.settings.page_range_format.unwrap_or_default(); + let delim = ctx + .term(OtherTerm::PageRangeDelimiter.into(), TermForm::default(), false) + .or(Some("–")); + + range + .ranges + .iter() + .map(|r| match r { + crate::PageRangesPart::Ampersand => ctx.write_str(" & "), + crate::PageRangesPart::Comma => ctx.write_str(", "), + crate::PageRangesPart::EscapedRange(start, end) => PageRangeFormat::Expanded + .format(ctx, &start.to_string(), &end.to_string(), delim), + crate::PageRangesPart::SinglePage(page) => ctx.write_str(&page.to_string()), + crate::PageRangesPart::Range(start, end) => { + format.format(ctx, &start.to_string(), &end.to_string(), delim) + } + }) + .collect() .unwrap(); } diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index 4f4bb72..adceb32 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -5,7 +5,7 @@ use std::str::FromStr; use crate::types::{ ChunkedString, Date, EntryType, MaybeTyped, Numeric, Person, PersonRole, StringChunk, }; -use crate::Entry; +use crate::{Entry, PageRanges}; use citationberg::taxonomy::{ DateVariable, Kind, NameVariable, NumberVariable, StandardVariable, }; @@ -157,14 +157,13 @@ impl EntryLike for Entry { NumberVariable::NumberOfVolumes => { self.volume_total().map(|n| MaybeTyped::Typed(Cow::Borrowed(n))) } - NumberVariable::Page => self.page_range().map(MaybeTyped::to_cow), + NumberVariable::Page => self + .page_range() + .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(*r)))), NumberVariable::PageFirst => self .page_range() - .and_then(|r| match r { - MaybeTyped::Typed(r) => r.range(), - MaybeTyped::String(_) => None, - }) - .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(*r.start())))), + .and_then(PageRanges::first) + .map(|r| MaybeTyped::Typed(Cow::Owned(*r))), NumberVariable::PartNumber => self .bound_select( &select!( diff --git a/src/interop.rs b/src/interop.rs index ffb4f17..874f0e1 100644 --- a/src/interop.rs +++ b/src/interop.rs @@ -1,6 +1,7 @@ //! Provides conversion methods for BibLaTeX. use std::convert::TryFrom; +use std::str::FromStr; use biblatex as tex; use tex::{ @@ -479,37 +480,23 @@ impl TryFrom<&tex::Entry> for Entry { if let Some(pages) = map_res(entry.pages())? { item.set_page_range(match pages { - PermissiveType::Typed(pages) => { - if let Some(n) = - pages.first().filter(|f| pages.len() == 1 && f.start == f.end) - { - MaybeTyped::Typed(Numeric::new(n.start as i32)) - } else { - let mut items = vec![]; - for (i, pair) in pages.iter().enumerate() { - let last = i + 1 == pages.len(); - let last_delim = (!last).then_some(NumericDelimiter::Comma); - - if pair.start == pair.end { - items.push((pair.start as i32, last_delim)); + PermissiveType::Typed(pages) => PageRanges::new( + pages + .into_iter() + .map(|p| { + if p.start == p.end { + PageRangesPart::SinglePage(Numeric::from(p.start)) } else { - items.push(( - pair.start as i32, - Some(NumericDelimiter::Hyphen), - )); - items.push((pair.end as i32, last_delim)); + PageRangesPart::Range( + Numeric::from(p.start), + Numeric::from(p.end), + ) } - } - - MaybeTyped::Typed(Numeric { - value: NumericValue::Set(items), - prefix: None, - suffix: None, }) - } - } + .collect(), + ), PermissiveType::Chunks(chunks) => { - MaybeTyped::infallible_from_str(&chunks.format_verbatim()) + PageRanges::from_str(&chunks.format_verbatim()).unwrap() } }); } diff --git a/src/lib.rs b/src/lib.rs index ada473b..2eab224 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -522,7 +522,7 @@ entry! { /// Published version of an item. "edition" => edition: MaybeTyped, /// The range of pages within the parent this item occupies - "page-range" => page_range: MaybeTyped, + "page-range" => page_range: PageRanges, /// The total number of pages the item has. "page-total" => page_total: Numeric, /// The time range within the parent this item starts and ends at. diff --git a/src/types/mod.rs b/src/types/mod.rs index 3de8766..e181468 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -12,11 +12,13 @@ use thiserror::Error; use url::Url; pub use numeric::*; +pub use page::*; pub use persons::*; pub use strings::*; pub use time::*; mod numeric; +mod page; mod persons; mod strings; mod time; diff --git a/src/types/numeric.rs b/src/types/numeric.rs index 2c3f901..40f0029 100644 --- a/src/types/numeric.rs +++ b/src/types/numeric.rs @@ -10,6 +10,8 @@ use serde::{Deserialize, Deserializer, Serialize}; use thiserror::Error; use unscanny::Scanner; +use crate::PageRanges; + use super::MaybeTyped; /// A numeric value that can be pluralized. @@ -204,6 +206,7 @@ impl Numeric { NumericValue::Number(n) if is_number_of => n != &1, NumericValue::Number(_) => false, NumericValue::Set(vec) => vec.len() != 1, + NumericValue::PageRanges(PageRanges { ranges }) => todo!(), } } @@ -218,7 +221,7 @@ impl Numeric { } /// Returns a range if the value is a range. - pub fn range(&self) -> Option> { + pub fn range(&self) -> Option<&PageRanges> { self.value.range() } @@ -228,6 +231,7 @@ impl Numeric { NumericValue::Number(val) if n == 0 => Some(*val), NumericValue::Number(_) => None, NumericValue::Set(vec) => vec.get(n).map(|(val, _)| *val), + NumericValue::PageRanges(_) => todo!(), } } @@ -330,6 +334,16 @@ impl From for Numeric { } } +impl From for Numeric { + fn from(value: PageRanges) -> Self { + Self { + value: NumericValue::PageRanges(value), + prefix: None, + suffix: None, + } + } +} + /// Error when parsing a numeric value. #[derive(Debug, Clone, Copy, Error, PartialEq, Eq)] pub enum NumericError { @@ -370,50 +384,18 @@ impl Display for Numeric { pub enum NumericValue { /// A single number. Number(i32), + /// Page ranges. Considered a number for compatability with CSL. + PageRanges(PageRanges), /// A set of numbers. Set(Vec<(i32, Option)>), } impl NumericValue { /// Returns a range if the value is a range. - pub fn range(&self) -> Option> { + pub fn range(&self) -> Option<&PageRanges> { match self { - // A single number is seen as a range of length 1. See #103. - Self::Number(n) => Some(*n..=*n), - Self::Set(vec) => { - if vec.len() == 2 { - let start = vec[0].0; - let end = vec[1].0; - let first_delim = vec[0].1; - - if start < end - && (first_delim == Some(NumericDelimiter::Hyphen) - || (first_delim == Some(NumericDelimiter::Ampersand) - && start + 1 == end)) - { - Some(start..=end) - } else if first_delim == Some(NumericDelimiter::Hyphen) { - // Handle shorthand notation like `100-4` for `100-104` - Some(start..=end) - } else { - None - } - } else if vec.len() > 2 { - for i in 1..vec.len() { - if vec[i - 1].1 != Some(NumericDelimiter::Ampersand) { - return None; - } - - if vec[i - 1].0 + 1 != vec[i].0 { - return None; - } - } - - Some(vec[0].0..=vec[vec.len() - 1].0) - } else { - None - } - } + Self::PageRanges(r) => Some(r), + _ => None, } } } diff --git a/src/types/page.rs b/src/types/page.rs new file mode 100644 index 0000000..622fb60 --- /dev/null +++ b/src/types/page.rs @@ -0,0 +1,238 @@ +use std::{fmt::Display, num::NonZeroUsize, str::FromStr}; + +use crate::{Numeric, NumericError}; + +use super::{deserialize_from_str, serialize_display}; +use serde::{de, Deserialize, Serialize}; +use thiserror::Error; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PageRanges { + pub ranges: Vec, +} + +impl PageRanges { + pub fn new(ranges: Vec) -> Self { + Self { ranges } + } + + pub fn first(&self) -> Option<&Numeric> { + self.ranges.first().and_then(PageRangesPart::start) + } +} + +impl Display for PageRanges { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.ranges.iter().map(|r| r.fmt(f)).collect() + } +} + +impl FromStr for PageRanges { + type Err = PageRangesPartErr; + + fn from_str(s: &str) -> Result { + // Split input into different ranges separated by `&` or `,` + Ok(Self { + ranges: group_by(s, |c, d| !(c == ',' || c == '&' || d == ',' || d == '&')) + .map(PageRangesPart::from_str) + .collect::>()?, + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum PageRangesPart { + Ampersand, + Comma, + EscapedRange(Numeric, Numeric), + SinglePage(Numeric), + Range(Numeric, Numeric), +} + +impl PageRangesPart { + pub fn start(&self) -> Option<&Numeric> { + match self { + Self::EscapedRange(s, _) => Some(s), + Self::SinglePage(s) => Some(s), + Self::Range(s, _) => Some(s), + _ => None, + } + } +} + +impl Display for PageRangesPart { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + PageRangesPart::Ampersand => "&", + PageRangesPart::Comma => ", ", + PageRangesPart::EscapedRange(s, e) => return write!(f, "{s}-{e}"), + PageRangesPart::SinglePage(s) => return write!(f, "{s}"), + PageRangesPart::Range(s, e) => return write!(f, "{s}-{e}"), + }; + Display::fmt(s, f) + } +} + +#[derive(Debug, Clone, Copy, Error)] +pub enum PageRangesPartErr { + /// The string is malformed. + #[error("page range string malformed")] + Malformed, + /// The string is empty. + #[error("page range is empty")] + Empty, + #[error("todo")] + NumericErr(NumericError), +} + +impl From for PageRangesPartErr { + fn from(value: NumericError) -> Self { + Self::NumericErr(value) + } +} + +impl FromStr for PageRangesPart { + type Err = PageRangesPartErr; + + fn from_str(s: &str) -> Result { + let s = s.trim(); + if s.is_empty() { + return Err(PageRangesPartErr::Empty); + } + let p = if s == "&" { + Self::Ampersand + } else if s == "," { + Self::Comma + } else if s.contains("\\-") { + // If `-` chars are escaped, write `-`. + let mut parts = s.split("\\-").map(str::trim); + + let start = parts.next().ok_or(PageRangesPartErr::Empty)?; + let end = parts.next().ok_or(PageRangesPartErr::Empty)?; + + let r = Self::EscapedRange(parse_number(start)?, parse_number(end)?); + if parts.next().is_some() { + return Err(PageRangesPartErr::Malformed); + } + r + } else { + // Otherwise, split into the two halves of the dash. + let mut parts = s.split(|c| c == '-' || c == '–').map(str::trim); + let r = match (parts.next(), parts.next()) { + (None, None) => todo!(), + (Some(start), None) => Self::SinglePage(parse_number(start)?), + (Some(start), Some(end)) => { + Self::Range(parse_number(start)?, parse_number(end)?) + } + _ => unreachable!(), + }; + if parts.next().is_some() { + return Err(PageRangesPartErr::Malformed); + } + r + }; + Ok(p) + } +} + +deserialize_from_str!(PageRanges); +serialize_display!(PageRanges); + +fn parse_number(s: &str) -> Result { + todo!() +} + +/// Split `s` into maximal chunks such that two successive chars satisfy `pred`. +/// +/// Returns an iterator over these chunks. +pub(crate) fn group_by(s: &str, pred: F) -> GroupBy<'_, F> +where + F: FnMut(char, char) -> bool, +{ + GroupBy::new(s, pred) +} + +/// An iterator over string slice in (non-overlapping) chunks separated by a predicate. +/// +/// Adapted from the nightly std. +pub(crate) struct GroupBy<'a, P> { + string: &'a str, + predicate: P, +} + +impl<'a, P> GroupBy<'a, P> { + pub(crate) fn new(string: &'a str, predicate: P) -> Self { + GroupBy { string, predicate } + } +} + +impl<'a, P> Iterator for GroupBy<'a, P> +where + P: FnMut(char, char) -> bool, +{ + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option { + if self.string.is_empty() { + None + } else { + let mut len = 1; + for w in windows(self.string, 2) { + let chars: Vec<_> = w.chars().collect(); + let (c, d) = (chars[0], chars[1]); + if (self.predicate)(c, d) { + len += 1 + } else { + break; + } + } + let (head, tail) = self.string.split_at(len); + self.string = tail; + Some(head) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.string.chars().size_hint() + } +} + +/// Return an iterator of sliding windows of size `size` over `string`. +/// +/// # Panic +/// +/// Panics if `size` is zero. +pub(crate) fn windows(string: &str, size: usize) -> Windows<'_> { + assert!(size > 0); + Windows::new(string, NonZeroUsize::new(size).unwrap()) +} + +/// An iterator of sliding windows of size `size` over `string`. +/// +/// Each call of `next` advanced the window by one. +pub(crate) struct Windows<'a> { + string: &'a str, + size: NonZeroUsize, +} + +impl<'a> Windows<'a> { + pub(crate) fn new(string: &'a str, size: NonZeroUsize) -> Self { + Self { string, size } + } +} + +impl<'a> Iterator for Windows<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + if self.size.get() > self.string.len() { + None + } else { + let ret = Some(&self.string[..self.size.get()]); + self.string = &self.string[1..]; + ret + } + } +} From 993267ca69f5e230c824486729f5ebcc84b67dad Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Fri, 10 May 2024 09:44:51 +0200 Subject: [PATCH 07/22] Adapt to new citationberg version --- src/csl/rendering/mod.rs | 7 ++--- src/csl/taxonomy.rs | 4 +-- src/lib.rs | 4 +-- src/selectors/mod.rs | 2 +- src/types/numeric.rs | 60 ++++++++++++++++++++++++++++++++++++++-- src/types/page.rs | 17 ++++++++++-- 6 files changed, 79 insertions(+), 15 deletions(-) diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index e0308d9..b0f1900 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -363,11 +363,11 @@ fn render_typed_num( }; if normal_num { - num.with_form(ctx, form, gender, ctx.ordinal_lookup()).unwrap(); + num.with_form(ctx, form, gender, &ctx.ordinal_lookup()).unwrap(); } } -fn render_page_range(range: PageRanges, ctx: &mut Context) { +fn render_page_range(range: &PageRanges, ctx: &mut Context) { let format = ctx.style.csl.settings.page_range_format.unwrap_or_default(); let delim = ctx .term(OtherTerm::PageRangeDelimiter.into(), TermForm::default(), false) @@ -376,7 +376,7 @@ fn render_page_range(range: PageRanges, ctx: &mut Context) { range .ranges .iter() - .map(|r| match r { + .try_for_each(|r| match r { crate::PageRangesPart::Ampersand => ctx.write_str(" & "), crate::PageRangesPart::Comma => ctx.write_str(", "), crate::PageRangesPart::EscapedRange(start, end) => PageRangeFormat::Expanded @@ -386,7 +386,6 @@ fn render_page_range(range: PageRanges, ctx: &mut Context) { format.format(ctx, &start.to_string(), &end.to_string(), delim) } }) - .collect() .unwrap(); } diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index adceb32..e94c083 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -159,11 +159,11 @@ impl EntryLike for Entry { } NumberVariable::Page => self .page_range() - .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(*r)))), + .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(r.clone())))), NumberVariable::PageFirst => self .page_range() .and_then(PageRanges::first) - .map(|r| MaybeTyped::Typed(Cow::Owned(*r))), + .map(|r| MaybeTyped::Typed(Cow::Owned(r.clone()))), NumberVariable::PartNumber => self .bound_select( &select!( diff --git a/src/lib.rs b/src/lib.rs index 2eab224..ee819ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -610,9 +610,7 @@ impl Entry { // Index parents with the items in path. If, at any level, the index // exceeds the number of parents, increment the index at the // previous level. If no other level remains, return. - let Some(first_path) = path.first() else { - return None; - }; + let first_path = path.first()?; if self.parents.len() <= *first_path { return None; diff --git a/src/selectors/mod.rs b/src/selectors/mod.rs index 9c3532d..8ce1523 100644 --- a/src/selectors/mod.rs +++ b/src/selectors/mod.rs @@ -228,7 +228,7 @@ impl Selector { Self::Binding(binding, expr) => { expr.apply_any(entries).map(|(mut bound, es)| { if !es.is_empty() { - bound.insert(binding.to_string(), es.get(0).unwrap()); + bound.insert(binding.to_string(), es.first().unwrap()); } (bound, vec![]) }) diff --git a/src/types/numeric.rs b/src/types/numeric.rs index 40f0029..e177904 100644 --- a/src/types/numeric.rs +++ b/src/types/numeric.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Deserializer, Serialize}; use thiserror::Error; use unscanny::Scanner; -use crate::PageRanges; +use crate::{PageRanges, PageRangesPart}; use super::MaybeTyped; @@ -136,6 +136,31 @@ impl Numeric { } } } + NumericValue::PageRanges(r) => { + for p in &r.ranges { + match p { + crate::PageRangesPart::Ampersand => write!(buf, " & ")?, + crate::PageRangesPart::Comma => write!(buf, ", ")?, + crate::PageRangesPart::EscapedRange(s, e) => { + s.fmt_value(buf, machine_readable)?; + write!(buf, "-")?; + e.fmt_value(buf, machine_readable)? + } + crate::PageRangesPart::SinglePage(s) => { + s.fmt_value(buf, machine_readable)? + } + crate::PageRangesPart::Range(s, e) => { + s.fmt_value(buf, machine_readable)?; + if machine_readable { + buf.write_char('–')?; + } else { + write!(buf, "–")?; + } + e.fmt_value(buf, machine_readable)? + } + } + } + } } Ok(()) @@ -162,7 +187,7 @@ impl Numeric { buf: &mut T, form: NumberForm, gender: Option, - ords: OrdinalLookup<'_>, + ords: &OrdinalLookup<'_>, ) -> std::fmt::Result where T: Write, @@ -195,6 +220,27 @@ impl Numeric { } } } + NumericValue::PageRanges(r) => { + for p in &r.ranges { + match p { + crate::PageRangesPart::Ampersand => write!(buf, " & ")?, + crate::PageRangesPart::Comma => write!(buf, ", ")?, + crate::PageRangesPart::EscapedRange(s, e) => { + s.with_form(buf, form, gender, ords)?; + write!(buf, "-")?; + e.with_form(buf, form, gender, ords)? + } + crate::PageRangesPart::SinglePage(s) => { + s.with_form(buf, form, gender, ords)? + } + crate::PageRangesPart::Range(s, e) => { + s.with_form(buf, form, gender, ords)?; + write!(buf, "–")?; + e.with_form(buf, form, gender, ords)? + } + } + } + } } Ok(()) @@ -206,7 +252,15 @@ impl Numeric { NumericValue::Number(n) if is_number_of => n != &1, NumericValue::Number(_) => false, NumericValue::Set(vec) => vec.len() != 1, - NumericValue::PageRanges(PageRanges { ranges }) => todo!(), + NumericValue::PageRanges(PageRanges { ranges }) => { + ranges.len() == 1 + && matches!( + ranges[0], + PageRangesPart::Ampersand + | PageRangesPart::Comma + | PageRangesPart::SinglePage(_) + ) + } } } diff --git a/src/types/page.rs b/src/types/page.rs index 622fb60..741479e 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -6,16 +6,20 @@ use super::{deserialize_from_str, serialize_display}; use serde::{de, Deserialize, Serialize}; use thiserror::Error; +/// Ranges of page numbers, e.g., `1-4, 5 & 6`. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct PageRanges { + /// The given ranges. pub ranges: Vec, } impl PageRanges { + /// Create a new `PageRanges` struct. pub fn new(ranges: Vec) -> Self { Self { ranges } } + /// Get the first page of the first range. pub fn first(&self) -> Option<&Numeric> { self.ranges.first().and_then(PageRangesPart::start) } @@ -23,7 +27,7 @@ impl PageRanges { impl Display for PageRanges { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.ranges.iter().map(|r| r.fmt(f)).collect() + self.ranges.iter().try_for_each(|r| r.fmt(f)) } } @@ -40,16 +44,23 @@ impl FromStr for PageRanges { } } +/// Parts of the page ranges. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum PageRangesPart { + /// An and, i.e, `&`. Ampersand, + /// A comma, i.e., `,`. Comma, + /// An escaped range with start and end, e.g., `1\-4`. EscapedRange(Numeric, Numeric), + /// A single page, e.g., `5`. SinglePage(Numeric), + /// A full range, e.g., `1n8--1n14`. Range(Numeric, Numeric), } impl PageRangesPart { + /// The start of a range if any. pub fn start(&self) -> Option<&Numeric> { match self { Self::EscapedRange(s, _) => Some(s), @@ -73,6 +84,7 @@ impl Display for PageRangesPart { } } +/// Parsing error for page ranges. #[derive(Debug, Clone, Copy, Error)] pub enum PageRangesPartErr { /// The string is malformed. @@ -81,6 +93,7 @@ pub enum PageRangesPartErr { /// The string is empty. #[error("page range is empty")] Empty, + /// An error from parsing a numeric value. #[error("todo")] NumericErr(NumericError), } @@ -139,7 +152,7 @@ deserialize_from_str!(PageRanges); serialize_display!(PageRanges); fn parse_number(s: &str) -> Result { - todo!() + Numeric::from_str(s) } /// Split `s` into maximal chunks such that two successive chars satisfy `pred`. From c8635cf1a0e2f9cde0983a225139de347a4a390d Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Fri, 10 May 2024 09:48:26 +0200 Subject: [PATCH 08/22] Update citationberg rev --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e39915e..f5d0dfa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ archive = ["ciborium"] csl-json = ["citationberg/json"] [dependencies] -citationberg = { git = "https://github.com/typst/citationberg.git" } +citationberg = { git = "https://github.com/typst/citationberg.git", rev = "ff1b135" } indexmap = { version = "2.0.2", features = ["serde"] } numerals = "0.1.4" paste = "1.0.14" From ac278c4f075a00c42db13d5a5684e5493aecc37c Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Fri, 10 May 2024 10:19:29 +0200 Subject: [PATCH 09/22] Fix cslJSON --- src/csl/taxonomy.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index e94c083..7d38491 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -764,7 +764,8 @@ impl EntryLike for citationberg::json::Item { { return n .range() - .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(*r.start())))); + .and_then(PageRanges::first) + .map(|r| MaybeTyped::Typed(Cow::Owned(r.clone()))); } } match self.0.get(&variable.to_string())? { From 99c8c1b90556a672c0d6caae86847d690761f5de Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 5 Aug 2024 09:19:35 +0200 Subject: [PATCH 10/22] Change provisional error message for `NumericErr` --- src/types/page.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/types/page.rs b/src/types/page.rs index 741479e..744845a 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -94,14 +94,8 @@ pub enum PageRangesPartErr { #[error("page range is empty")] Empty, /// An error from parsing a numeric value. - #[error("todo")] - NumericErr(NumericError), -} - -impl From for PageRangesPartErr { - fn from(value: NumericError) -> Self { - Self::NumericErr(value) - } + #[error("page range contained invalid numeric value")] + NumericErr(#[from] NumericError), } impl FromStr for PageRangesPart { From bd66f013cf8808cac4b0c3e9134fbc8576d21b9f Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 5 Aug 2024 09:27:38 +0200 Subject: [PATCH 11/22] Have CI report errors --- Cargo.toml | 2 +- src/csl/taxonomy.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f5d0dfa..3f38f2f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ archive = ["ciborium"] csl-json = ["citationberg/json"] [dependencies] -citationberg = { git = "https://github.com/typst/citationberg.git", rev = "ff1b135" } +citationberg = { git = "https://github.com/typst/citationberg.git", rev = "ad52765" } indexmap = { version = "2.0.2", features = ["serde"] } numerals = "0.1.4" paste = "1.0.14" diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index e94c083..2d1cd2f 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -764,7 +764,8 @@ impl EntryLike for citationberg::json::Item { { return n .range() - .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(*r.start())))); + .and_then(|r| r.first()) + .map(|r| MaybeTyped::Typed(Cow::Owned(r.clone()))); } } match self.0.get(&variable.to_string())? { From bbf99b2eeb0778cd6f1d11c276f785d9a0c20483 Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 20 Aug 2024 09:26:24 +0200 Subject: [PATCH 12/22] Fix page ranges --- Cargo.toml | 2 +- src/csl/mod.rs | 9 ++- src/csl/rendering/mod.rs | 134 +++++++++++++++++++++++++-------------- src/csl/sort.rs | 11 ++++ src/csl/taxonomy.rs | 39 +++++++++--- src/types/numeric.rs | 85 ------------------------- src/types/page.rs | 102 +++++++++++++++++++++++++++-- tests/citeproc-pass.txt | 1 + 8 files changed, 229 insertions(+), 154 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ac18748..fb7bac7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ archive = ["ciborium"] csl-json = ["citationberg/json"] [dependencies] -citationberg = { git = "https://github.com/typst/citationberg.git", rev = "61ca6a7fcc48365f805e521cc8bc1f8f679ff372" } +citationberg = { git = "https://github.com/typst/citationberg.git", rev = "4a2c71ab051e45c1ed1533af5b8ad7e03737955c" } indexmap = { version = "2.0.2", features = ["serde"] } numerals = "0.1.4" paste = "1.0.14" diff --git a/src/csl/mod.rs b/src/csl/mod.rs index 6206275..2edb495 100644 --- a/src/csl/mod.rs +++ b/src/csl/mod.rs @@ -9,7 +9,7 @@ use std::{mem, vec}; use citationberg::taxonomy::{ DateVariable, Locator, NameVariable, NumberVariable, OtherTerm, StandardVariable, - Term, Variable, + Term, Variable, PageVariable }; use citationberg::{ taxonomy as csl_taxonomy, Affixes, BaseLanguage, Citation, CitationFormat, Collapse, @@ -30,7 +30,7 @@ use self::elem::last_text_mut_child; pub use self::elem::{ BufWriteFormat, Elem, ElemChild, ElemChildren, ElemMeta, Formatted, Formatting, }; -use self::taxonomy::{EntryLike, NumberVariableResult}; +use self::taxonomy::{EntryLike, NumberVariableResult, PageVariableResult}; #[cfg(feature = "archive")] pub mod archive; @@ -2562,6 +2562,11 @@ impl<'a, T: EntryLike> Context<'a, T> { res } + fn resolve_page_variable(&self) -> Option { + self.writing.prepare_variable_query(PageVariable::Page)?; + self.instance.resolve_page_variable() + } + /// Resolve a name variable. /// /// Honors suppressions. diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index b0f1900..fae8de2 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -3,7 +3,8 @@ use std::fmt::Write; use std::str::FromStr; use citationberg::taxonomy::{ - Locator, NumberVariable, OtherTerm, StandardVariable, Term, Variable, + Locator, NumberOrPageVariable, NumberVariable, OtherTerm, PageVariable, + StandardVariable, Term, Variable, }; use citationberg::{ ChooseBranch, CslMacro, DateDayForm, DateMonthForm, DatePartName, DateParts, @@ -13,7 +14,7 @@ use citationberg::{ }; use citationberg::{TermForm, TextTarget}; -use crate::csl::taxonomy::NumberVariableResult; +use crate::csl::taxonomy::{NumberVariableResult, PageVariableResult}; use crate::lang::{Case, SentenceCase, TitleCase}; use crate::types::{ChunkedString, Date, MaybeTyped, Numeric}; use crate::PageRanges; @@ -83,21 +84,19 @@ impl RenderCsl for citationberg::Text { } _ => ctx.push_chunked(&val), }, - ResolvedTextTarget::NumberVariable(var, n) => match n { + ResolvedTextTarget::NumberVariable(_, n) => match n { NumberVariableResult::Regular(MaybeTyped::Typed(num)) if num.will_transform() => { - render_typed_num(num.as_ref(), NumberForm::default(), var, None, ctx); - } - NumberVariableResult::Regular(n) - if matches!(var, NumberVariable::Page) => - { - // TODO: Remove this hack - ctx.push_str(&n.to_str().replace('-', "–")) + render_typed_num(num.as_ref(), NumberForm::default(), None, ctx); } NumberVariableResult::Regular(n) => ctx.push_str(&n.to_str()), NumberVariableResult::Transparent(n) => ctx.push_transparent(n), }, + ResolvedTextTarget::PageVariable(p) => match p { + MaybeTyped::Typed(r) => render_page_range(&r, ctx), + MaybeTyped::String(s) => ctx.push_str(&s.replace("-", "–")), + }, ResolvedTextTarget::Macro(mac) => { for child in &mac.children { child.render(ctx); @@ -140,6 +139,9 @@ impl RenderCsl for citationberg::Text { match target { ResolvedTextTarget::StandardVariable(s, _) => var == Variable::Standard(s), ResolvedTextTarget::NumberVariable(n, _) => var == Variable::Number(n), + ResolvedTextTarget::PageVariable(_) => { + var == Variable::Page(PageVariable::Page) + } ResolvedTextTarget::Macro(mac) => { mac.children.iter().any(|c| c.will_render(ctx, var)) } @@ -172,7 +174,8 @@ impl RenderCsl for citationberg::Text { (false, UsageInfo { has_vars: true, ..Default::default() }) } ResolvedTextTarget::StandardVariable(_, _) - | ResolvedTextTarget::NumberVariable(_, _) => ( + | ResolvedTextTarget::NumberVariable(_, _) + | ResolvedTextTarget::PageVariable(_) => ( true, UsageInfo { has_vars: true, @@ -200,6 +203,7 @@ impl RenderCsl for citationberg::Text { enum ResolvedTextTarget<'a, 'b> { StandardVariable(StandardVariable, Cow<'a, ChunkedString>), NumberVariable(NumberVariable, NumberVariableResult<'a>), + PageVariable(PageVariableResult), Macro(&'a CslMacro), Term(&'a str), Value(&'b str), @@ -245,6 +249,9 @@ impl<'a, 'b> ResolvedTextTarget<'a, 'b> { TextTarget::Variable { var: Variable::Number(var), .. } => ctx .resolve_number_variable(*var) .map(|n| ResolvedTextTarget::NumberVariable(*var, n)), + TextTarget::Variable { var: Variable::Page(_), .. } => ctx + .resolve_page_variable() + .map(ResolvedTextTarget::PageVariable), TextTarget::Variable { .. } => None, TextTarget::Macro { name } => { ctx.style.get_macro(name).map(ResolvedTextTarget::Macro) @@ -280,7 +287,7 @@ impl RenderCsl for citationberg::Number { Some(NumberVariableResult::Regular(MaybeTyped::Typed(num))) if num.will_transform() => { - render_typed_num(num.as_ref(), self.form, self.variable, gender, ctx); + render_typed_num(num.as_ref(), self.form, gender, ctx); } Some(NumberVariableResult::Regular(MaybeTyped::Typed(num))) => { write!(ctx, "{}", num).unwrap() @@ -347,24 +354,10 @@ impl RenderCsl for citationberg::Number { fn render_typed_num( num: &Numeric, form: NumberForm, - variable: NumberVariable, gender: Option, ctx: &mut Context, ) { - let normal_num = if form == NumberForm::Numeric && variable == NumberVariable::Page { - if let Some(range) = num.range() { - render_page_range(range, ctx); - false - } else { - true - } - } else { - true - }; - - if normal_num { - num.with_form(ctx, form, gender, &ctx.ordinal_lookup()).unwrap(); - } + num.with_form(ctx, form, gender, &ctx.ordinal_lookup()).unwrap(); } fn render_page_range(range: &PageRanges, ctx: &mut Context) { @@ -399,7 +392,11 @@ fn label_pluralization( LabelPluralize::Contextual => match variable { NumberVariableResult::Regular(MaybeTyped::String(_)) => false, NumberVariableResult::Regular(MaybeTyped::Typed(n)) => { - n.is_plural(label.variable.is_number_of_variable()) + if let NumberOrPageVariable::Number(v) = label.variable { + n.is_plural(v.is_number_of_variable()) + } else { + panic!("Incompatiable variable types") + } } NumberVariableResult::Transparent(_) => false, }, @@ -412,19 +409,37 @@ impl RenderCsl for citationberg::Label { return; } - let Some(variable) = ctx.resolve_number_variable(self.variable) else { - return; - }; + match self.variable { + NumberOrPageVariable::Number(n) => { + let Some(variable) = ctx.resolve_number_variable(n) else { + return; + }; + + let depth = ctx.push_elem(citationberg::Formatting::default()); + let plural = label_pluralization(self, variable); + + let content = ctx + .term(Term::from(self.variable), self.label.form, plural) + .unwrap_or_default(); + + render_label_with_var(&self.label, ctx, content); + ctx.commit_elem(depth, None, Some(ElemMeta::Label)); + } + NumberOrPageVariable::Page(pv) => { + let Some(_) = ctx.resolve_page_variable() else { + return; + }; - let depth = ctx.push_elem(citationberg::Formatting::default()); - let plural = label_pluralization(self, variable); + let depth = ctx.push_elem(citationberg::Formatting::default()); - let content = ctx - .term(Term::from(self.variable), self.label.form, plural) - .unwrap_or_default(); + // TODO: when to pluralize? + let content = + ctx.term(Term::from(pv), self.label.form, false).unwrap_or_default(); - render_label_with_var(&self.label, ctx, content); - ctx.commit_elem(depth, None, Some(ElemMeta::Label)); + render_label_with_var(&self.label, ctx, content); + ctx.commit_elem(depth, None, Some(ElemMeta::Label)); + } + } } fn will_render(&self, _ctx: &mut Context, _var: Variable) -> bool { @@ -433,7 +448,9 @@ impl RenderCsl for citationberg::Label { fn will_have_info(&self, ctx: &mut Context) -> (bool, UsageInfo) { match ctx.instance.kind { - Some(SpecialForm::VarOnly(Variable::Number(n))) if self.variable != n => { + Some(SpecialForm::VarOnly(Variable::Number(n))) + if self.variable != NumberOrPageVariable::Number(n) => + { return (false, UsageInfo::default()); } Some( @@ -441,7 +458,8 @@ impl RenderCsl for citationberg::Label { | SpecialForm::OnlyFirstDate | SpecialForm::OnlyYearSuffix, ) => { - if self.variable != NumberVariable::Locator { + if self.variable != NumberOrPageVariable::Number(NumberVariable::Locator) + { return (true, UsageInfo::default()); } } @@ -449,7 +467,7 @@ impl RenderCsl for citationberg::Label { } // Never yield a label if the locator is set to custom. - if self.variable == NumberVariable::Locator + if self.variable == NumberOrPageVariable::Number(NumberVariable::Locator) && ctx .instance .cite_props @@ -460,14 +478,31 @@ impl RenderCsl for citationberg::Label { return (false, UsageInfo::default()); } - if let Some(num) = ctx.resolve_number_variable(self.variable) { - let plural = label_pluralization(self, num); - ( - ctx.term(Term::from(self.variable), self.label.form, plural).is_some(), - UsageInfo::default(), - ) - } else { - (false, UsageInfo::default()) + match self.variable { + NumberOrPageVariable::Number(n) => { + if let Some(num) = ctx.resolve_number_variable(n) { + let plural = label_pluralization(self, num); + ( + ctx.term(Term::from(self.variable), self.label.form, plural) + .is_some(), + UsageInfo::default(), + ) + } else { + (false, UsageInfo::default()) + } + } + NumberOrPageVariable::Page(pv) => { + if let Some(_) = ctx.resolve_page_variable() { + // TODO + let plural = false; + ( + ctx.term(Term::from(pv), self.label.form, plural).is_some(), + UsageInfo::default(), + ) + } else { + (false, UsageInfo::default()) + } + } } } } @@ -1097,6 +1132,7 @@ impl<'a, 'b, T: EntryLike> Iterator for BranchConditionIter<'a, 'b, T> { Variable::Name(n) => { !self.ctx.resolve_name_variable(n).is_empty() } + Variable::Page(_) => self.ctx.resolve_page_variable().is_some(), }) } else { None diff --git a/src/csl/sort.rs b/src/csl/sort.rs index bcf9380..d762bad 100644 --- a/src/csl/sort.rs +++ b/src/csl/sort.rs @@ -82,6 +82,17 @@ impl<'a> StyleContext<'a> { (None, None) => Ordering::Equal, } } + SortKey::Variable { variable: Variable::Page(_), .. } => { + let a = InstanceContext::sort_instance(a, a_idx).resolve_page_variable(); + let b = InstanceContext::sort_instance(b, b_idx).resolve_page_variable(); + + match (a, b) { + (Some(a), Some(b)) => a.csl_cmp(&b), + (Some(_), None) => Ordering::Greater, + (None, Some(_)) => Ordering::Less, + (None, None) => Ordering::Equal, + } + } SortKey::MacroName { name, names_min, diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index 7d38491..b30aea3 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -23,6 +23,7 @@ pub trait EntryLike { &self, variable: NumberVariable, ) -> Option>>; + fn resolve_page_variable(&self) -> Option>; fn resolve_standard_variable( &self, form: LongShortForm, @@ -70,6 +71,10 @@ impl<'a, T: EntryLike> InstanceContext<'a, T> { } } + pub(super) fn resolve_page_variable(&self) -> Option { + self.entry.resolve_page_variable() + } + // Number variables are standard variables. pub(super) fn resolve_standard_variable( &self, @@ -97,6 +102,8 @@ pub(super) enum NumberVariableResult<'a> { Transparent(usize), } +pub(super) type PageVariableResult = MaybeTyped; + impl<'a> NumberVariableResult<'a> { pub(super) fn from_regular(regular: MaybeTyped>) -> Self { Self::Regular(regular) @@ -157,9 +164,6 @@ impl EntryLike for Entry { NumberVariable::NumberOfVolumes => { self.volume_total().map(|n| MaybeTyped::Typed(Cow::Borrowed(n))) } - NumberVariable::Page => self - .page_range() - .map(|r| MaybeTyped::Typed(Cow::Owned(Numeric::from(r.clone())))), NumberVariable::PageFirst => self .page_range() .and_then(PageRanges::first) @@ -200,6 +204,10 @@ impl EntryLike for Entry { } } + fn resolve_page_variable(&self) -> Option> { + self.page_range().map(|r| MaybeTyped::Typed(r.clone())) + } + // Number variables are standard variables. fn resolve_standard_variable( &self, @@ -754,18 +762,29 @@ impl EntryLike for citationberg::json::Item { } } + fn resolve_page_variable(&self) -> Option> { + match self.0.get("page")? { + csl_json::Value::Number(n) => { + Some(MaybeTyped::Typed(PageRanges::from(*n as u64))) + } + csl_json::Value::String(s) => { + let res = MaybeTyped::::infallible_from_str(s); + Some(match res { + MaybeTyped::String(s) => MaybeTyped::String(s), + MaybeTyped::Typed(r) => MaybeTyped::Typed(r), + }) + } + _ => None, + } + } + fn resolve_number_variable( &self, variable: NumberVariable, ) -> Option>> { if matches!(variable, NumberVariable::PageFirst) { - if let Some(MaybeTyped::Typed(Cow::Owned(n))) = - self.resolve_number_variable(NumberVariable::Page) - { - return n - .range() - .and_then(PageRanges::first) - .map(|r| MaybeTyped::Typed(Cow::Owned(r.clone()))); + if let Some(MaybeTyped::Typed(n)) = self.resolve_page_variable() { + return n.first().map(|r| MaybeTyped::Typed(Cow::Owned(r.clone()))); } } match self.0.get(&variable.to_string())? { diff --git a/src/types/numeric.rs b/src/types/numeric.rs index e177904..531cf24 100644 --- a/src/types/numeric.rs +++ b/src/types/numeric.rs @@ -10,8 +10,6 @@ use serde::{Deserialize, Deserializer, Serialize}; use thiserror::Error; use unscanny::Scanner; -use crate::{PageRanges, PageRangesPart}; - use super::MaybeTyped; /// A numeric value that can be pluralized. @@ -136,31 +134,6 @@ impl Numeric { } } } - NumericValue::PageRanges(r) => { - for p in &r.ranges { - match p { - crate::PageRangesPart::Ampersand => write!(buf, " & ")?, - crate::PageRangesPart::Comma => write!(buf, ", ")?, - crate::PageRangesPart::EscapedRange(s, e) => { - s.fmt_value(buf, machine_readable)?; - write!(buf, "-")?; - e.fmt_value(buf, machine_readable)? - } - crate::PageRangesPart::SinglePage(s) => { - s.fmt_value(buf, machine_readable)? - } - crate::PageRangesPart::Range(s, e) => { - s.fmt_value(buf, machine_readable)?; - if machine_readable { - buf.write_char('–')?; - } else { - write!(buf, "–")?; - } - e.fmt_value(buf, machine_readable)? - } - } - } - } } Ok(()) @@ -220,27 +193,6 @@ impl Numeric { } } } - NumericValue::PageRanges(r) => { - for p in &r.ranges { - match p { - crate::PageRangesPart::Ampersand => write!(buf, " & ")?, - crate::PageRangesPart::Comma => write!(buf, ", ")?, - crate::PageRangesPart::EscapedRange(s, e) => { - s.with_form(buf, form, gender, ords)?; - write!(buf, "-")?; - e.with_form(buf, form, gender, ords)? - } - crate::PageRangesPart::SinglePage(s) => { - s.with_form(buf, form, gender, ords)? - } - crate::PageRangesPart::Range(s, e) => { - s.with_form(buf, form, gender, ords)?; - write!(buf, "–")?; - e.with_form(buf, form, gender, ords)? - } - } - } - } } Ok(()) @@ -252,15 +204,6 @@ impl Numeric { NumericValue::Number(n) if is_number_of => n != &1, NumericValue::Number(_) => false, NumericValue::Set(vec) => vec.len() != 1, - NumericValue::PageRanges(PageRanges { ranges }) => { - ranges.len() == 1 - && matches!( - ranges[0], - PageRangesPart::Ampersand - | PageRangesPart::Comma - | PageRangesPart::SinglePage(_) - ) - } } } @@ -274,18 +217,12 @@ impl Numeric { .flatten() } - /// Returns a range if the value is a range. - pub fn range(&self) -> Option<&PageRanges> { - self.value.range() - } - /// Returns the nth number in the set. pub fn nth(&self, n: usize) -> Option { match &self.value { NumericValue::Number(val) if n == 0 => Some(*val), NumericValue::Number(_) => None, NumericValue::Set(vec) => vec.get(n).map(|(val, _)| *val), - NumericValue::PageRanges(_) => todo!(), } } @@ -388,16 +325,6 @@ impl From for Numeric { } } -impl From for Numeric { - fn from(value: PageRanges) -> Self { - Self { - value: NumericValue::PageRanges(value), - prefix: None, - suffix: None, - } - } -} - /// Error when parsing a numeric value. #[derive(Debug, Clone, Copy, Error, PartialEq, Eq)] pub enum NumericError { @@ -438,22 +365,10 @@ impl Display for Numeric { pub enum NumericValue { /// A single number. Number(i32), - /// Page ranges. Considered a number for compatability with CSL. - PageRanges(PageRanges), /// A set of numbers. Set(Vec<(i32, Option)>), } -impl NumericValue { - /// Returns a range if the value is a range. - pub fn range(&self) -> Option<&PageRanges> { - match self { - Self::PageRanges(r) => Some(r), - _ => None, - } - } -} - /// Delimits individual numbers in a numeric value. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum NumericDelimiter { diff --git a/src/types/page.rs b/src/types/page.rs index 744845a..8275a43 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -1,11 +1,21 @@ -use std::{fmt::Display, num::NonZeroUsize, str::FromStr}; +use std::{cmp::Ordering, fmt::Display, num::NonZeroUsize, str::FromStr}; -use crate::{Numeric, NumericError}; +use crate::{MaybeTyped, Numeric, NumericError}; use super::{deserialize_from_str, serialize_display}; use serde::{de, Deserialize, Serialize}; use thiserror::Error; +impl MaybeTyped { + /// Order the values according to CSL rules. + pub(crate) fn csl_cmp(&self, other: &Self) -> std::cmp::Ordering { + match (self, other) { + (MaybeTyped::Typed(a), MaybeTyped::Typed(b)) => a.csl_cmp(b), + _ => self.to_string().cmp(&other.to_string()), + } + } +} + /// Ranges of page numbers, e.g., `1-4, 5 & 6`. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct PageRanges { @@ -23,6 +33,35 @@ impl PageRanges { pub fn first(&self) -> Option<&Numeric> { self.ranges.first().and_then(PageRangesPart::start) } + + /// Order the values according to CSL rules. + pub(crate) fn csl_cmp(&self, other: &Self) -> std::cmp::Ordering { + let mut i = 0; + loop { + let a = self.ranges.get(i); + let b = other.ranges.get(i); + + match (a, b) { + (Some(a), Some(b)) => { + let ord = a.csl_cmp(&b); + if ord != std::cmp::Ordering::Equal { + return ord; + } + } + (Some(_), None) => return std::cmp::Ordering::Greater, + (None, Some(_)) => return std::cmp::Ordering::Less, + (None, None) => return std::cmp::Ordering::Equal, + } + + i += 1; + } + } +} + +impl From for PageRanges { + fn from(value: u64) -> Self { + Self { ranges: vec![value.into()] } + } } impl Display for PageRanges { @@ -69,6 +108,43 @@ impl PageRangesPart { _ => None, } } + + /// Order the values according to CSL rules. + pub(crate) fn csl_cmp(&self, other: &Self) -> std::cmp::Ordering { + match (self, other) { + (Self::Ampersand, Self::Ampersand) => Ordering::Equal, + (Self::Ampersand, _) => Ordering::Less, + (_, Self::Ampersand) => Ordering::Greater, + (Self::Comma, Self::Comma) => Ordering::Equal, + (Self::Comma, _) => Ordering::Less, + (_, Self::Comma) => Ordering::Greater, + (Self::SinglePage(n1), Self::SinglePage(n2)) => n1.csl_cmp(n2), + (Self::SinglePage(_),_) => Ordering::Less, + (_, Self::SinglePage(_)) => Ordering::Greater, + (Self::EscapedRange(s1, e1), Self::EscapedRange(s2, e2)) => { + let ord = s1.csl_cmp(s2); + if ord != Ordering::Equal { + return ord; + } + e1.csl_cmp(e2) + } + (Self::EscapedRange(_, _), _) => Ordering::Less, + (_, Self::EscapedRange(_, _)) => Ordering::Greater, + (Self::Range(s1, e1), Self::Range(s2, e2)) => { + let ord = s1.csl_cmp(s2); + if ord != Ordering::Equal { + return ord; + } + e1.csl_cmp(e2) + } + } + } +} + +impl From for PageRangesPart { + fn from(value: u64) -> Self { + Self::SinglePage((value as u32).into()) + } } impl Display for PageRangesPart { @@ -189,7 +265,7 @@ where let chars: Vec<_> = w.chars().collect(); let (c, d) = (chars[0], chars[1]); if (self.predicate)(c, d) { - len += 1 + len += c.len_utf8(); } else { break; } @@ -234,12 +310,24 @@ impl<'a> Iterator for Windows<'a> { type Item = &'a str; fn next(&mut self) -> Option { - if self.size.get() > self.string.len() { + let size = self.size.get(); + if size > self.string.len() { None } else { - let ret = Some(&self.string[..self.size.get()]); - self.string = &self.string[1..]; - ret + let mut indices = self.string.char_indices(); + let next = indices.nth(1).unwrap().0; + match indices.nth(size - 2) { + Some((idx, _)) => { + let ret = Some(&self.string[..idx]); + self.string = &self.string[next..]; + ret + } + None => { + let ret = Some(self.string); + self.string = ""; + ret + } + } } } } diff --git a/tests/citeproc-pass.txt b/tests/citeproc-pass.txt index c10d743..f132d31 100644 --- a/tests/citeproc-pass.txt +++ b/tests/citeproc-pass.txt @@ -274,6 +274,7 @@ nameorder_ShortDemoteDisplayAndSort nameorder_ShortNameAsSortDemoteNever namespaces_NonNada3 number_IsNumericWithAlpha +number_MixedPageRange number_PageFirst number_PageRange number_SimpleNumberArabic From aca3971bac7c54d97e5e0f74e0de38166ea45244 Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 20 Aug 2024 09:27:23 +0200 Subject: [PATCH 13/22] Formatting --- src/csl/mod.rs | 4 ++-- src/csl/rendering/mod.rs | 6 +++--- src/csl/sort.rs | 2 +- src/types/page.rs | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/csl/mod.rs b/src/csl/mod.rs index 2edb495..fd0dae5 100644 --- a/src/csl/mod.rs +++ b/src/csl/mod.rs @@ -8,8 +8,8 @@ use std::num::{NonZeroI16, NonZeroUsize}; use std::{mem, vec}; use citationberg::taxonomy::{ - DateVariable, Locator, NameVariable, NumberVariable, OtherTerm, StandardVariable, - Term, Variable, PageVariable + DateVariable, Locator, NameVariable, NumberVariable, OtherTerm, PageVariable, + StandardVariable, Term, Variable, }; use citationberg::{ taxonomy as csl_taxonomy, Affixes, BaseLanguage, Citation, CitationFormat, Collapse, diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index fae8de2..31bdf50 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -249,9 +249,9 @@ impl<'a, 'b> ResolvedTextTarget<'a, 'b> { TextTarget::Variable { var: Variable::Number(var), .. } => ctx .resolve_number_variable(*var) .map(|n| ResolvedTextTarget::NumberVariable(*var, n)), - TextTarget::Variable { var: Variable::Page(_), .. } => ctx - .resolve_page_variable() - .map(ResolvedTextTarget::PageVariable), + TextTarget::Variable { var: Variable::Page(_), .. } => { + ctx.resolve_page_variable().map(ResolvedTextTarget::PageVariable) + } TextTarget::Variable { .. } => None, TextTarget::Macro { name } => { ctx.style.get_macro(name).map(ResolvedTextTarget::Macro) diff --git a/src/csl/sort.rs b/src/csl/sort.rs index d762bad..a59bd3b 100644 --- a/src/csl/sort.rs +++ b/src/csl/sort.rs @@ -86,7 +86,7 @@ impl<'a> StyleContext<'a> { let a = InstanceContext::sort_instance(a, a_idx).resolve_page_variable(); let b = InstanceContext::sort_instance(b, b_idx).resolve_page_variable(); - match (a, b) { + match (a, b) { (Some(a), Some(b)) => a.csl_cmp(&b), (Some(_), None) => Ordering::Greater, (None, Some(_)) => Ordering::Less, diff --git a/src/types/page.rs b/src/types/page.rs index 8275a43..ebbd2c7 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -119,7 +119,7 @@ impl PageRangesPart { (Self::Comma, _) => Ordering::Less, (_, Self::Comma) => Ordering::Greater, (Self::SinglePage(n1), Self::SinglePage(n2)) => n1.csl_cmp(n2), - (Self::SinglePage(_),_) => Ordering::Less, + (Self::SinglePage(_), _) => Ordering::Less, (_, Self::SinglePage(_)) => Ordering::Greater, (Self::EscapedRange(s1, e1), Self::EscapedRange(s2, e2)) => { let ord = s1.csl_cmp(s2); From 817530a592ff67a6d33ec0ce8e420bba220f7cad Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 20 Aug 2024 09:29:14 +0200 Subject: [PATCH 14/22] Clippy --- src/csl/mod.rs | 2 +- src/types/page.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/csl/mod.rs b/src/csl/mod.rs index fd0dae5..2dc5f10 100644 --- a/src/csl/mod.rs +++ b/src/csl/mod.rs @@ -129,7 +129,7 @@ impl<'a, T: EntryLike + Hash + PartialEq + Eq + Debug> BibliographyDriver<'a, T> &mut citation.items, style.csl.citation.sort.as_ref(), citation.locale.as_ref(), - &citation_number, + citation_number, ); let items = &citation.items; diff --git a/src/types/page.rs b/src/types/page.rs index ebbd2c7..5a63ae7 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -43,7 +43,7 @@ impl PageRanges { match (a, b) { (Some(a), Some(b)) => { - let ord = a.csl_cmp(&b); + let ord = a.csl_cmp(b); if ord != std::cmp::Ordering::Equal { return ord; } From dfe2250e884a2e4b5f2ddf8f61f9dd88e7063333 Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 20 Aug 2024 10:03:06 +0200 Subject: [PATCH 15/22] Update citationberg --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fb7bac7..15d47ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ archive = ["ciborium"] csl-json = ["citationberg/json"] [dependencies] -citationberg = { git = "https://github.com/typst/citationberg.git", rev = "4a2c71ab051e45c1ed1533af5b8ad7e03737955c" } +citationberg = { git = "https://github.com/typst/citationberg.git", rev = "b23892cdddb7cf665ea4cd513b671cb67ceabf9c" } indexmap = { version = "2.0.2", features = ["serde"] } numerals = "0.1.4" paste = "1.0.14" From b12df03c3a713f6661027e1fb488e5d83c993cd7 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 2 Sep 2024 11:45:41 +0200 Subject: [PATCH 16/22] Update citationberg --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 15d47ea..be77110 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ archive = ["ciborium"] csl-json = ["citationberg/json"] [dependencies] -citationberg = { git = "https://github.com/typst/citationberg.git", rev = "b23892cdddb7cf665ea4cd513b671cb67ceabf9c" } +citationberg = { git = "https://github.com/typst/citationberg.git", rev = "e3fd3f08e0e16983b7c3514b791b64c704dc2524" } indexmap = { version = "2.0.2", features = ["serde"] } numerals = "0.1.4" paste = "1.0.14" From 361626ea45242000a8a3ab798e1e84e067f87c04 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 2 Sep 2024 11:49:05 +0200 Subject: [PATCH 17/22] Make clippy happy --- src/csl/rendering/mod.rs | 4 ++-- src/types/mod.rs | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index 31bdf50..a116f23 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -95,7 +95,7 @@ impl RenderCsl for citationberg::Text { }, ResolvedTextTarget::PageVariable(p) => match p { MaybeTyped::Typed(r) => render_page_range(&r, ctx), - MaybeTyped::String(s) => ctx.push_str(&s.replace("-", "–")), + MaybeTyped::String(s) => ctx.push_str(&s.replace('-', "–")), }, ResolvedTextTarget::Macro(mac) => { for child in &mac.children { @@ -492,7 +492,7 @@ impl RenderCsl for citationberg::Label { } } NumberOrPageVariable::Page(pv) => { - if let Some(_) = ctx.resolve_page_variable() { + if ctx.resolve_page_variable().is_some() { // TODO let plural = false; ( diff --git a/src/types/mod.rs b/src/types/mod.rs index e181468..46cc5b9 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -438,13 +438,13 @@ impl<'de> Deserialize<'de> for SerialNumber { Float(f64), } - impl ToString for StringOrNumber { - fn to_string(&self) -> String { + impl fmt::Display for StringOrNumber { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::String(s) => s.clone(), - Self::Number(n) => n.to_string(), - Self::UnsignedNumber(n) => n.to_string(), - Self::Float(f) => f.to_string(), + StringOrNumber::String(s) => s.fmt(f), + StringOrNumber::Number(n) => n.fmt(f), + StringOrNumber::UnsignedNumber(n) => n.fmt(f), + StringOrNumber::Float(n) => n.fmt(f), } } } From a35df0c5766b9e764e4aad07cab1fa7ff53f9c1f Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 3 Sep 2024 11:05:15 +0200 Subject: [PATCH 18/22] Fix todos --- src/csl/rendering/mod.rs | 16 +++++++++++----- src/types/page.rs | 7 ++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index a116f23..ecdf5e1 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -426,15 +426,19 @@ impl RenderCsl for citationberg::Label { ctx.commit_elem(depth, None, Some(ElemMeta::Label)); } NumberOrPageVariable::Page(pv) => { - let Some(_) = ctx.resolve_page_variable() else { + let Some(p) = ctx.resolve_page_variable() else { return; }; let depth = ctx.push_elem(citationberg::Formatting::default()); + let plural = match p { + MaybeTyped::Typed(p) => p.is_plural(), + _ => false + }; // TODO: when to pluralize? let content = - ctx.term(Term::from(pv), self.label.form, false).unwrap_or_default(); + ctx.term(Term::from(pv), self.label.form, plural).unwrap_or_default(); render_label_with_var(&self.label, ctx, content); ctx.commit_elem(depth, None, Some(ElemMeta::Label)); @@ -492,9 +496,11 @@ impl RenderCsl for citationberg::Label { } } NumberOrPageVariable::Page(pv) => { - if ctx.resolve_page_variable().is_some() { - // TODO - let plural = false; + if let Some(p) = ctx.resolve_page_variable() { + let plural = match p { + MaybeTyped::Typed(p) => p.is_plural(), + _ => false + }; ( ctx.term(Term::from(pv), self.label.form, plural).is_some(), UsageInfo::default(), diff --git a/src/types/page.rs b/src/types/page.rs index 5a63ae7..a6f5b4b 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -56,6 +56,11 @@ impl PageRanges { i += 1; } } + + /// Whether to pluralize the `pages` term, when used with this page range. + pub fn is_plural(&self) -> bool { + self.ranges.len() != 1 + } } impl From for PageRanges { @@ -202,7 +207,7 @@ impl FromStr for PageRangesPart { // Otherwise, split into the two halves of the dash. let mut parts = s.split(|c| c == '-' || c == '–').map(str::trim); let r = match (parts.next(), parts.next()) { - (None, None) => todo!(), + (None, None) => unreachable!(), (Some(start), None) => Self::SinglePage(parse_number(start)?), (Some(start), Some(end)) => { Self::Range(parse_number(start)?, parse_number(end)?) From 0ceac2ea2925206116e7bb01a6f26e774df4e2dc Mon Sep 17 00:00:00 2001 From: DerDrodt Date: Tue, 3 Sep 2024 11:09:53 +0200 Subject: [PATCH 19/22] Formatting --- src/csl/rendering/mod.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index ecdf5e1..6100e48 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -433,10 +433,9 @@ impl RenderCsl for citationberg::Label { let depth = ctx.push_elem(citationberg::Formatting::default()); let plural = match p { MaybeTyped::Typed(p) => p.is_plural(), - _ => false + _ => false, }; - // TODO: when to pluralize? let content = ctx.term(Term::from(pv), self.label.form, plural).unwrap_or_default(); @@ -499,7 +498,7 @@ impl RenderCsl for citationberg::Label { if let Some(p) = ctx.resolve_page_variable() { let plural = match p { MaybeTyped::Typed(p) => p.is_plural(), - _ => false + _ => false, }; ( ctx.term(Term::from(pv), self.label.form, plural).is_some(), From 26e7079146e993ed6a67b5348b04034a2cbc97f4 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 1 Oct 2024 10:50:53 +0200 Subject: [PATCH 20/22] Do not assume there is only one page variable --- src/csl/mod.rs | 9 ++++--- src/csl/rendering/mod.rs | 12 +++++---- src/csl/sort.rs | 8 +++--- src/csl/taxonomy.rs | 58 ++++++++++++++++++++++++++-------------- 4 files changed, 56 insertions(+), 31 deletions(-) diff --git a/src/csl/mod.rs b/src/csl/mod.rs index 95e834f..1338922 100644 --- a/src/csl/mod.rs +++ b/src/csl/mod.rs @@ -2567,9 +2567,12 @@ impl<'a, T: EntryLike> Context<'a, T> { res } - fn resolve_page_variable(&self) -> Option { - self.writing.prepare_variable_query(PageVariable::Page)?; - self.instance.resolve_page_variable() + fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option { + self.writing.prepare_variable_query(variable)?; + self.instance.resolve_page_variable(variable) } /// Resolve a name variable. diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index 6100e48..581ee2f 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -249,8 +249,8 @@ impl<'a, 'b> ResolvedTextTarget<'a, 'b> { TextTarget::Variable { var: Variable::Number(var), .. } => ctx .resolve_number_variable(*var) .map(|n| ResolvedTextTarget::NumberVariable(*var, n)), - TextTarget::Variable { var: Variable::Page(_), .. } => { - ctx.resolve_page_variable().map(ResolvedTextTarget::PageVariable) + TextTarget::Variable { var: Variable::Page(pv), .. } => { + ctx.resolve_page_variable(*pv).map(ResolvedTextTarget::PageVariable) } TextTarget::Variable { .. } => None, TextTarget::Macro { name } => { @@ -426,7 +426,7 @@ impl RenderCsl for citationberg::Label { ctx.commit_elem(depth, None, Some(ElemMeta::Label)); } NumberOrPageVariable::Page(pv) => { - let Some(p) = ctx.resolve_page_variable() else { + let Some(p) = ctx.resolve_page_variable(pv) else { return; }; @@ -495,7 +495,7 @@ impl RenderCsl for citationberg::Label { } } NumberOrPageVariable::Page(pv) => { - if let Some(p) = ctx.resolve_page_variable() { + if let Some(p) = ctx.resolve_page_variable(pv) { let plural = match p { MaybeTyped::Typed(p) => p.is_plural(), _ => false, @@ -1137,7 +1137,9 @@ impl<'a, 'b, T: EntryLike> Iterator for BranchConditionIter<'a, 'b, T> { Variable::Name(n) => { !self.ctx.resolve_name_variable(n).is_empty() } - Variable::Page(_) => self.ctx.resolve_page_variable().is_some(), + Variable::Page(pv) => { + self.ctx.resolve_page_variable(pv).is_some() + } }) } else { None diff --git a/src/csl/sort.rs b/src/csl/sort.rs index a59bd3b..cab43dc 100644 --- a/src/csl/sort.rs +++ b/src/csl/sort.rs @@ -82,9 +82,11 @@ impl<'a> StyleContext<'a> { (None, None) => Ordering::Equal, } } - SortKey::Variable { variable: Variable::Page(_), .. } => { - let a = InstanceContext::sort_instance(a, a_idx).resolve_page_variable(); - let b = InstanceContext::sort_instance(b, b_idx).resolve_page_variable(); + SortKey::Variable { variable: Variable::Page(pv), .. } => { + let a = + InstanceContext::sort_instance(a, a_idx).resolve_page_variable(*pv); + let b = + InstanceContext::sort_instance(b, b_idx).resolve_page_variable(*pv); match (a, b) { (Some(a), Some(b)) => a.csl_cmp(&b), diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index b30aea3..5cf853e 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -7,7 +7,7 @@ use crate::types::{ }; use crate::{Entry, PageRanges}; use citationberg::taxonomy::{ - DateVariable, Kind, NameVariable, NumberVariable, StandardVariable, + DateVariable, Kind, NameVariable, NumberVariable, PageVariable, StandardVariable, }; use citationberg::{taxonomy, LongShortForm}; use unic_langid::LanguageIdentifier; @@ -23,7 +23,10 @@ pub trait EntryLike { &self, variable: NumberVariable, ) -> Option>>; - fn resolve_page_variable(&self) -> Option>; + fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option>; fn resolve_standard_variable( &self, form: LongShortForm, @@ -71,8 +74,11 @@ impl<'a, T: EntryLike> InstanceContext<'a, T> { } } - pub(super) fn resolve_page_variable(&self) -> Option { - self.entry.resolve_page_variable() + pub(super) fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option { + self.entry.resolve_page_variable(variable) } // Number variables are standard variables. @@ -204,8 +210,13 @@ impl EntryLike for Entry { } } - fn resolve_page_variable(&self) -> Option> { - self.page_range().map(|r| MaybeTyped::Typed(r.clone())) + fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option> { + match variable { + PageVariable::Page => self.page_range().map(|r| MaybeTyped::Typed(r.clone())), + } } // Number variables are standard variables. @@ -762,19 +773,24 @@ impl EntryLike for citationberg::json::Item { } } - fn resolve_page_variable(&self) -> Option> { - match self.0.get("page")? { - csl_json::Value::Number(n) => { - Some(MaybeTyped::Typed(PageRanges::from(*n as u64))) - } - csl_json::Value::String(s) => { - let res = MaybeTyped::::infallible_from_str(s); - Some(match res { - MaybeTyped::String(s) => MaybeTyped::String(s), - MaybeTyped::Typed(r) => MaybeTyped::Typed(r), - }) - } - _ => None, + fn resolve_page_variable( + &self, + variable: PageVariable, + ) -> Option> { + match variable { + PageVariable::Page => match self.0.get("page")? { + csl_json::Value::Number(n) => { + Some(MaybeTyped::Typed(PageRanges::from(*n as u64))) + } + csl_json::Value::String(s) => { + let res = MaybeTyped::::infallible_from_str(s); + Some(match res { + MaybeTyped::String(s) => MaybeTyped::String(s), + MaybeTyped::Typed(r) => MaybeTyped::Typed(r), + }) + } + _ => None, + }, } } @@ -783,7 +799,9 @@ impl EntryLike for citationberg::json::Item { variable: NumberVariable, ) -> Option>> { if matches!(variable, NumberVariable::PageFirst) { - if let Some(MaybeTyped::Typed(n)) = self.resolve_page_variable() { + if let Some(MaybeTyped::Typed(n)) = + self.resolve_page_variable(PageVariable::Page) + { return n.first().map(|r| MaybeTyped::Typed(Cow::Owned(r.clone()))); } } From 5bcba620d390f8d2c144132b67fcaf46a60b2ad0 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 1 Oct 2024 11:36:31 +0200 Subject: [PATCH 21/22] Use `Iterator::cmp` instead of loops --- src/types/numeric.rs | 86 +++++++++++++++++++++++++++++++------------- src/types/page.rs | 32 ++++++++--------- 2 files changed, 78 insertions(+), 40 deletions(-) diff --git a/src/types/numeric.rs b/src/types/numeric.rs index 531cf24..9716cf3 100644 --- a/src/types/numeric.rs +++ b/src/types/numeric.rs @@ -219,34 +219,12 @@ impl Numeric { /// Returns the nth number in the set. pub fn nth(&self, n: usize) -> Option { - match &self.value { - NumericValue::Number(val) if n == 0 => Some(*val), - NumericValue::Number(_) => None, - NumericValue::Set(vec) => vec.get(n).map(|(val, _)| *val), - } + self.value.nth(n) } /// Order the values according to CSL rules. pub(crate) fn csl_cmp(&self, other: &Self) -> std::cmp::Ordering { - let mut i = 0; - loop { - let a = self.nth(i); - let b = other.nth(i); - - match (a, b) { - (Some(a), Some(b)) => { - let ord = a.cmp(&b); - if ord != std::cmp::Ordering::Equal { - return ord; - } - } - (Some(_), None) => return std::cmp::Ordering::Greater, - (None, Some(_)) => return std::cmp::Ordering::Less, - (None, None) => return std::cmp::Ordering::Equal, - } - - i += 1; - } + self.value.into_iter().cmp(&other.value) } } @@ -369,6 +347,66 @@ pub enum NumericValue { Set(Vec<(i32, Option)>), } +impl NumericValue { + /// Return the length of the numeric value. + pub fn len(&self) -> usize { + match self { + NumericValue::Number(_) => 1, + NumericValue::Set(vec) => vec.len(), + } + } + + /// Whether the numeric value is an empty set. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the nth number in the set. + fn nth(&self, n: usize) -> Option { + match self { + NumericValue::Number(val) if n == 0 => Some(*val), + NumericValue::Number(_) => None, + NumericValue::Set(vec) => vec.get(n).map(|(val, _)| *val), + } + } +} + +/// An iterator over the numbers in a numeric value. +pub struct NumIterator<'a> { + num: &'a NumericValue, + idx: usize, +} + +impl<'a> Iterator for NumIterator<'a> { + type Item = i32; + + fn next(&mut self) -> Option { + let val = self.num.nth(self.idx); + self.idx += 1; + val + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.num.len() - self.idx; + (len, Some(len)) + } +} + +impl<'a> ExactSizeIterator for NumIterator<'a> { + fn len(&self) -> usize { + self.size_hint().0 + } +} + +impl<'a> IntoIterator for &'a NumericValue { + type Item = i32; + type IntoIter = NumIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + NumIterator { num: self, idx: 0 } + } +} + /// Delimits individual numbers in a numeric value. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum NumericDelimiter { diff --git a/src/types/page.rs b/src/types/page.rs index a6f5b4b..01e6d34 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -36,25 +36,25 @@ impl PageRanges { /// Order the values according to CSL rules. pub(crate) fn csl_cmp(&self, other: &Self) -> std::cmp::Ordering { - let mut i = 0; - loop { - let a = self.ranges.get(i); - let b = other.ranges.get(i); - - match (a, b) { - (Some(a), Some(b)) => { - let ord = a.csl_cmp(b); - if ord != std::cmp::Ordering::Equal { - return ord; - } - } - (Some(_), None) => return std::cmp::Ordering::Greater, - (None, Some(_)) => return std::cmp::Ordering::Less, - (None, None) => return std::cmp::Ordering::Equal, + #[derive(PartialEq, Eq)] + struct OrderablePageRangesPart<'a>(&'a PageRangesPart); + + impl Ord for OrderablePageRangesPart<'_> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.csl_cmp(other.0) } + } - i += 1; + impl PartialOrd for OrderablePageRangesPart<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } + + self.ranges + .iter() + .map(OrderablePageRangesPart) + .cmp(other.ranges.iter().map(OrderablePageRangesPart)) } /// Whether to pluralize the `pages` term, when used with this page range. From de4bb72edceb8836fda3553b096a1e20f7c2f76b Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 1 Oct 2024 11:45:01 +0200 Subject: [PATCH 22/22] Clippy --- src/types/page.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types/page.rs b/src/types/page.rs index 01e6d34..1efb632 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -205,7 +205,7 @@ impl FromStr for PageRangesPart { r } else { // Otherwise, split into the two halves of the dash. - let mut parts = s.split(|c| c == '-' || c == '–').map(str::trim); + let mut parts = s.split(['-', '–']).map(str::trim); let r = match (parts.next(), parts.next()) { (None, None) => unreachable!(), (Some(start), None) => Self::SinglePage(parse_number(start)?),