Skip to content

Commit

Permalink
feat: rule003 check spelling
Browse files Browse the repository at this point in the history
  • Loading branch information
charislam committed Dec 5, 2024
1 parent 608f37a commit 0c69b0d
Show file tree
Hide file tree
Showing 21 changed files with 84,496 additions and 92 deletions.
2 changes: 1 addition & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::{
rules::RuleContext,
};

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
pub enum LintLevel {
Error,
Expand Down
134 changes: 133 additions & 1 deletion src/geometry.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::cmp::Ordering;
use std::ops::{Add, Deref, DerefMut, Range};
use std::ops::{Add, Deref, DerefMut, Range, SubAssign};

use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -55,6 +55,12 @@ impl Add for AdjustedOffset {
}
}

impl SubAssign for AdjustedOffset {
fn sub_assign(&mut self, rhs: Self) {
self.0 -= rhs.0;
}
}

impl AdjustedOffset {
pub fn increment(&mut self, steps: usize) {
self.0 += steps;
Expand Down Expand Up @@ -267,6 +273,48 @@ impl DenormalizedLocation {
}
}

#[derive(Debug, Default)]
pub(crate) struct RangeSet(Vec<AdjustedRange>);

impl RangeSet {
pub(crate) fn new() -> Self {
Default::default()
}

pub(crate) fn push(&mut self, range: AdjustedRange) {
match self.overlaps_impl(&range) {
Ok(index) => {
self.0[index] = AdjustedRange::span_between(&self.0[index], &range);
}
Err(index) => {
self.0.insert(index, range);
}
}
}

pub(crate) fn completely_contains(&self, range: &AdjustedRange) -> bool {
match self.overlaps_impl(range) {
Err(_) => false,
Ok(index) => {
let potential_container = &self.0[index];
potential_container.start <= range.start && potential_container.end >= range.end
}
}
}

fn overlaps_impl(&self, range: &AdjustedRange) -> Result<usize, usize> {
self.0.binary_search_by(|probe| {
if probe.end < range.start {
Ordering::Less
} else if probe.start > range.end {
Ordering::Greater
} else {
Ordering::Equal
}
})
}
}

#[cfg(test)]
mod tests {
use super::{AdjustedOffset, AdjustedPoint, AdjustedRange, DenormalizedLocation};
Expand Down Expand Up @@ -296,4 +344,88 @@ mod tests {
}
}
}

#[test]
fn test_range_set_merges_overlapping_ranges() {
let mut set = super::RangeSet::new();

let range1 = AdjustedRange::new(AdjustedOffset::from(0), AdjustedOffset::from(5));
let range2 = AdjustedRange::new(AdjustedOffset::from(3), AdjustedOffset::from(8));

set.push(range1);
set.push(range2);

assert_eq!(set.0.len(), 1);
assert_eq!(set.0[0].start, AdjustedOffset::from(0));
assert_eq!(set.0[0].end, AdjustedOffset::from(8));
}

#[test]
fn test_range_set_merges_adjacent_ranges() {
let mut set = super::RangeSet::new();

let range1 = AdjustedRange::new(AdjustedOffset::from(0), AdjustedOffset::from(5));
let range2 = AdjustedRange::new(AdjustedOffset::from(5), AdjustedOffset::from(8));

set.push(range1);
set.push(range2);

assert_eq!(set.0.len(), 1);
assert_eq!(set.0[0].start, AdjustedOffset::from(0));
assert_eq!(set.0[0].end, AdjustedOffset::from(8));

let mut set = super::RangeSet::new();

let range1 = AdjustedRange::new(AdjustedOffset::from(5), AdjustedOffset::from(8));
let range2 = AdjustedRange::new(AdjustedOffset::from(0), AdjustedOffset::from(5));

set.push(range1);
set.push(range2);

assert_eq!(set.0.len(), 1);
assert_eq!(set.0[0].start, AdjustedOffset::from(0));
assert_eq!(set.0[0].end, AdjustedOffset::from(8));
}

#[test]
fn test_range_set_keeps_non_overlapping_ranges_separate() {
let mut set = super::RangeSet::new();

let range1 = AdjustedRange::new(AdjustedOffset::from(0), AdjustedOffset::from(3));
let range2 = AdjustedRange::new(AdjustedOffset::from(5), AdjustedOffset::from(8));

set.push(range1);
set.push(range2);

assert_eq!(set.0.len(), 2);
assert_eq!(set.0[0].start, AdjustedOffset::from(0));
assert_eq!(set.0[0].end, AdjustedOffset::from(3));
assert_eq!(set.0[1].start, AdjustedOffset::from(5));
assert_eq!(set.0[1].end, AdjustedOffset::from(8));
}

#[test]
fn test_range_set_completely_contains() {
let mut set = super::RangeSet::new();

// Add a range from 0-10
let container = AdjustedRange::new(AdjustedOffset::from(0), AdjustedOffset::from(10));
set.push(container);

// Test contained range
let contained = AdjustedRange::new(AdjustedOffset::from(2), AdjustedOffset::from(8));
assert!(set.completely_contains(&contained));

// Test partially overlapping range
let partial = AdjustedRange::new(AdjustedOffset::from(5), AdjustedOffset::from(12));
assert!(!set.completely_contains(&partial));

// Test non-overlapping range
let outside = AdjustedRange::new(AdjustedOffset::from(15), AdjustedOffset::from(20));
assert!(!set.completely_contains(&outside));

// Test exact same range
let same = AdjustedRange::new(AdjustedOffset::from(0), AdjustedOffset::from(10));
assert!(set.completely_contains(&same));
}
}
16 changes: 12 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,13 @@ mod tests {
#[test]
fn test_lint_valid_string() -> Result<()> {
let config = Config::default();
let linter = LinterBuilder.configure(config).build()?;
let mut linter = LinterBuilder.configure(config).build()?;
linter
.config
.rule_registry
.deactivate_all_but("Rule001HeadingCase");

let valid_mdx = "# Hello, world!\n\nThis is valid MDX document.";
let valid_mdx = "# Hello, world!\n\nThis is a valid document.";
let result = linter.lint(&LintTarget::String(valid_mdx.to_string()))?;

assert!(
Expand All @@ -174,9 +178,13 @@ mod tests {
#[test]
fn test_lint_invalid_string() -> Result<()> {
let config = Config::default();
let linter = LinterBuilder.configure(config).build()?;
let mut linter = LinterBuilder.configure(config).build()?;
linter
.config
.rule_registry
.deactivate_all_but("Rule001HeadingCase");

let invalid_mdx = "# Incorrect Heading\n\nThis is an invalid MDX document.";
let invalid_mdx = "# Incorrect Heading\n\nThis is an invalid document.";
let result = linter.lint(&LintTarget::String(invalid_mdx.to_string()))?;

assert!(
Expand Down
10 changes: 10 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::{
io::{BufWriter, Write},
path::PathBuf,
process,
time::Instant,
};

use anyhow::{Context, Result};
Expand Down Expand Up @@ -108,6 +109,8 @@ fn get_diagnostics(targets: &[String], linter: &Linter) -> Result<Vec<LintOutput
}

fn execute() -> Result<Result<()>> {
let start = Instant::now();

let args = Args::parse();

let log_level = setup_logging(&args)?;
Expand Down Expand Up @@ -155,6 +158,13 @@ fn execute() -> Result<Result<()>> {

if !args.silent {
args.format.format(&diagnostics, &mut stdout)?;
let duration = start.elapsed().as_secs();
writeln!(
stdout,
"🕚 Done in {} second{}",
duration,
if duration == 1 { "" } else { "s" }
)?;
}

stdout.flush()?;
Expand Down
5 changes: 4 additions & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{any::Any, collections::HashMap, error::Error, fmt::Display};

use anyhow::{anyhow, Result};
use itertools::Itertools;
use log::{debug, warn};
use log::{debug, trace, warn};
use markdown::{mdast::Node, to_mdast, unist, Constructs, ParseOptions};
use regex::Regex;

Expand All @@ -28,6 +28,9 @@ pub(crate) struct ParseResult {
pub(crate) fn parse(input: &str) -> Result<ParseResult> {
let (content, rope, content_start_offset, frontmatter) = process_raw_content_string(input);
let ast = parse_internal(content)?;

trace!("AST: {:#?}", ast);

Ok(ParseResult {
ast,
rope,
Expand Down
52 changes: 45 additions & 7 deletions src/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,17 @@ use crate::{

mod rule001_heading_case;
mod rule002_admonition_types;
mod rule003_spelling;

use rule001_heading_case::Rule001HeadingCase;
use rule002_admonition_types::Rule002AdmonitionTypes;
use rule003_spelling::Rule003Spelling;

fn get_all_rules() -> Vec<Box<dyn Rule>> {
vec![
Box::new(Rule001HeadingCase::default()),
Box::new(Rule002AdmonitionTypes::default()),
Box::new(Rule003Spelling::default()),
]
}

Expand Down Expand Up @@ -62,10 +65,18 @@ pub struct RuleSettings(toml::Value);

#[derive(Default)]
pub struct RegexSettings {
/// Regex should only be matched against beginning of string.
pub match_beginning: bool,
pub beginning: Option<RegexBeginning>,
/// Regex should only match if it matches up to the end of the word.
pub match_word_boundary_at_end: bool,
pub ending: Option<RegexEnding>,
}

pub enum RegexBeginning {
VeryBeginning,
WordBoundary,
}

pub enum RegexEnding {
WordBoundary,
}

impl RuleSettings {
Expand Down Expand Up @@ -129,11 +140,33 @@ impl RuleSettings {
if let toml::Value::String(pattern) = value {
let mut pattern = pattern.to_string();
if let Some(settings) = settings {
if settings.match_beginning && !pattern.starts_with('^') {
pattern = format!("^{}", pattern);
match settings.beginning {
Some(RegexBeginning::VeryBeginning) => {
if !pattern.starts_with('^') {
pattern = format!("^{}", pattern);
}
}
Some(RegexBeginning::WordBoundary) => {
if !pattern.starts_with("\\b")
&& !pattern.starts_with("\\s")
&& !pattern.starts_with("^")
{
pattern = format!("(?:^|\\s|\\b){}", pattern);
}
}
None => {}
}
if settings.match_word_boundary_at_end && !pattern.ends_with("\\b") {
pattern = format!("{}\\b", pattern);
#[allow(clippy::single_match)]
match settings.ending {
Some(RegexEnding::WordBoundary) => {
if !pattern.ends_with("\\b")
&& !pattern.ends_with("\\s")
&& !pattern.ends_with("$")
{
pattern = format!(r#"{}(?:\s|\b|$|[.,!?'"-])"#, pattern);
}
}
None => {}
}
}

Expand Down Expand Up @@ -263,6 +296,11 @@ impl RuleRegistry {
self.rules.iter().any(|rule| rule.name() == rule_name)
}

#[cfg(test)]
pub fn deactivate_all_but(&mut self, rule_name: &str) {
self.rules.retain(|rule| rule.name() == rule_name)
}

pub fn setup(&mut self, settings: &HashMap<String, RuleSettings>) -> Result<()> {
match self.state {
RuleRegistryState::PreSetup => {
Expand Down
Loading

0 comments on commit 0c69b0d

Please sign in to comment.