Skip to content

Commit

Permalink
levenshtein: LevenshteinState, usize input distance
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinKavik committed Mar 19, 2020
1 parent 04e38f9 commit a58d63f
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 26 deletions.
69 changes: 54 additions & 15 deletions src/automaton/levenshtein.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,10 @@ impl Levenshtein {
#[inline]
pub fn new(
query: &str,
distance: u32,
distance: usize,
) -> Result<Levenshtein, LevenshteinError> {
let lev = DynamicLevenshtein {
query: query.to_owned(),
dist: distance as usize,
};
let lev =
DynamicLevenshtein { query: query.to_owned(), dist: distance };
let dfa = DfaBuilder::new(lev.clone()).build()?;
Ok(Levenshtein { prog: lev, dfa })
}
Expand Down Expand Up @@ -165,27 +163,58 @@ impl DynamicLevenshtein {
}
}

/// Levenshtein automaton state.
///
/// It is useful for obtaining edit distance while searching.
/// See examples in documentation for `Map::search_with_state`
/// or `Set::search_with_state`.
///
/// This is only defined when the `levenshtein` crate feature is enabled.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct LevenshteinState {
/// Internal state index.
pub state_idx: usize,
/// Levenshtein edit distance.
pub distance: Option<usize>,
}

impl Automaton for Levenshtein {
type State = Option<usize>;
type State = Option<LevenshteinState>;

#[inline]
fn start(&self) -> Option<usize> {
Some(0)
fn start(&self) -> Option<LevenshteinState> {
Some(LevenshteinState {
state_idx: 0,
distance: self.dfa.states[0].distance,
})
}

#[inline]
fn is_match(&self, state: &Option<usize>) -> bool {
state.map(|state| self.dfa.states[state].is_match).unwrap_or(false)
fn is_match(&self, state: &Option<LevenshteinState>) -> bool {
state
.map(|state| self.dfa.states[state.state_idx].is_match)
.unwrap_or(false)
}

#[inline]
fn can_match(&self, state: &Option<usize>) -> bool {
fn can_match(&self, state: &Option<LevenshteinState>) -> bool {
state.is_some()
}

#[inline]
fn accept(&self, state: &Option<usize>, byte: u8) -> Option<usize> {
state.and_then(|state| self.dfa.states[state].next[byte as usize])
fn accept(
&self,
state: &Option<LevenshteinState>,
byte: u8,
) -> Option<LevenshteinState> {
state.and_then(|state| {
self.dfa.states[state.state_idx].next[byte as usize].map(
|next_state_idx| LevenshteinState {
state_idx: next_state_idx,
distance: self.dfa.states[next_state_idx].distance,
},
)
})
}
}

Expand All @@ -197,12 +226,14 @@ struct Dfa {
struct State {
next: [Option<usize>; 256],
is_match: bool,
distance: Option<usize>,
}

impl fmt::Debug for State {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "State {{")?;
writeln!(f, " is_match: {:?}", self.is_match)?;
writeln!(f, " distance: {:?}", self.distance)?;
for i in 0..256 {
if let Some(si) = self.next[i] {
writeln!(f, " {:?}: {:?}", i, si)?;
Expand Down Expand Up @@ -273,7 +304,11 @@ impl DfaBuilder {
Entry::Occupied(v) => (*v.get(), true),
Entry::Vacant(v) => {
let is_match = self.lev.is_match(lev_state);
self.dfa.states.push(State { next: [None; 256], is_match });
self.dfa.states.push(State {
next: [None; 256],
is_match,
distance: lev_state.last().copied(),
});
(*v.insert(self.dfa.states.len() - 1), false)
}
})
Expand Down Expand Up @@ -334,7 +369,11 @@ impl DfaBuilder {
}

fn new_state(&mut self, is_match: bool) -> usize {
self.dfa.states.push(State { next: [None; 256], is_match });
self.dfa.states.push(State {
next: [None; 256],
is_match,
distance: None,
});
self.dfa.states.len() - 1
}
}
2 changes: 1 addition & 1 deletion src/automaton/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#[cfg(feature = "levenshtein")]
pub use self::levenshtein::{Levenshtein, LevenshteinError};
pub use self::levenshtein::{Levenshtein, LevenshteinError, LevenshteinState};

#[cfg(feature = "levenshtein")]
mod levenshtein;
Expand Down
16 changes: 11 additions & 5 deletions src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ An implementation of fuzzy search using Levenshtein automata can be used
to search maps:
```rust
use fst::automaton::Levenshtein;
use fst::automaton::{Levenshtein, LevenshteinState};
use fst::{IntoStreamer, Streamer, Map};
# fn main() { example().unwrap(); }
Expand All @@ -341,11 +341,17 @@ fn example() -> Result<(), Box<dyn std::error::Error>> {
while let Some((k, v, s)) = stream.next() {
kvs.push((String::from_utf8(k.to_vec())?, v, s));
}
// Currently, there isn't much interesting that you can do with the states.
assert_eq!(kvs, vec![
("foo".to_string(), 1, Some(183)),
("foob".to_string(), 2, Some(123)),
("fozb".to_string(), 4, Some(83)),
("foo".to_string(), 1, Some(LevenshteinState {
state_idx: 183, distance: Some(0)
})),
("foob".to_string(), 2, Some(LevenshteinState {
state_idx: 123, distance: Some(1)
})),
("fozb".to_string(), 4, Some(LevenshteinState {
state_idx: 83, distance: Some(2)
})),
]);
Ok(())
Expand Down
16 changes: 11 additions & 5 deletions src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ An implementation of fuzzy search using Levenshtein automata can be used
to search sets:
```rust
use fst::automaton::Levenshtein;
use fst::automaton::{Levenshtein, LevenshteinState};
use fst::{IntoStreamer, Streamer, Set};
# fn main() { example().unwrap(); }
Expand All @@ -229,11 +229,17 @@ fn example() -> Result<(), Box<dyn std::error::Error>> {
while let Some((v, s)) = stream.next() {
vs.push((String::from_utf8(v.to_vec())?, s));
}
// Currently, there isn't much interesting that you can do with the states.
assert_eq!(vs, vec![
("foo".to_string(), Some(183)),
("foob".to_string(), Some(123)),
("fozb".to_string(), Some(83)),
("foo".to_string(), Some(LevenshteinState {
state_idx: 183, distance: Some(0)
})),
("foob".to_string(), Some(LevenshteinState {
state_idx: 123, distance: Some(1)
})),
("fozb".to_string(), Some(LevenshteinState {
state_idx: 83, distance: Some(2)
})),
]);
Ok(())
Expand Down

0 comments on commit a58d63f

Please sign in to comment.