diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 5b2dd8e..648d27f 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -453,6 +453,34 @@ impl> Fst { self.as_ref().get_key_into(value, key) } + /// find the longest key that is prefix of the given value. + /// + /// If the key exists, then `Some((value, key_len))` is returned, where + /// `value` is the value associated with the key, and `key_len` is the + /// length of the found key. Otherwise `None` is returned. + /// + /// This can be used to e.g. build tokenizing functions. + #[inline] + pub fn find_longest_prefix(&self, value: &[u8]) -> Option<(u64, usize)> { + let mut node = self.root(); + let mut out = Output::zero(); + let mut last_match = None; + for (i, &b) in input.iter().enumerate() { + if let Some(trans_index) = node.find_input(b) { + let t = node.transition(trans_index); + node = self.node(t.addr); + if node.is_final() { + last_match = + Some((out.cat(node.final_output()).value(), i + 1)); + } + out = out.cat(t.out); + } else { + return last_match; + } + } + last_match + } + /// Return a lexicographically ordered stream of all key-value pairs in /// this fst. #[inline]