1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/*!

Build a deterministic finite automaton (DFA) that computes
the levenshtein distance from a given string.


# Example

```rust
# extern crate levenshtein_automata;
use levenshtein_automata::{LevenshteinAutomatonBuilder, Distance};
# fn main() {

// Building this factory is not free.
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(2, true);

// We can now build an entire dfa.
let dfa = lev_automaton_builder.build_dfa("Levenshtein");

let mut state = dfa.initial_state();
    for &b in "Levenshtain".as_bytes() {
    state = dfa.transition(state, b);
}

assert_eq!(dfa.distance(state), Distance::Exact(1));
# }
```

The implementation is based on the following paper
**Fast String Correction with Levenshtein-Automata (2002)** by by Klaus Schulz and Stoyan Mihov.
I also tried to explain it in the following [blog post](https://fulmicoton.com/posts/levenshtein/).

!*/

#![cfg_attr(test, feature(test))]

#[cfg(test)]
extern crate test;

#[cfg(test)]
mod bench;
#[cfg(test)]
mod tests;

mod alphabet;
mod dfa;
mod index;
mod levenshtein_nfa;
mod parametric_dfa;

pub use self::dfa::{DFA, SINK_STATE};
use self::index::Index;
pub use self::levenshtein_nfa::Distance;
use self::levenshtein_nfa::LevenshteinNFA;
use self::parametric_dfa::ParametricDFA;

/// Builder for Levenshtein Automata.
///
/// It wraps a precomputed datastructure that allows to
/// produce small (but not minimal) DFA.
pub struct LevenshteinAutomatonBuilder {
    parametric_dfa: ParametricDFA,
}

impl LevenshteinAutomatonBuilder {
    /// Creates a Levenshtein automaton builder.
    /// The builder
    ///
    /// * `max_distance` - maximum distance considered by the automaton.
    /// * `transposition_cost_one` - assign a distance of 1 for transposition
    ///
    /// Building this automaton builder is computationally intensive.
    /// While it takes only a few milliseconds for `d=2`, it grows exponentially with
    /// `d`. It is only reasonable to `d <= 5`.
    pub fn new(max_distance: u8, transposition_cost_one: bool) -> LevenshteinAutomatonBuilder {
        let levenshtein_nfa = LevenshteinNFA::levenshtein(max_distance, transposition_cost_one);
        let parametric_dfa = ParametricDFA::from_nfa(&levenshtein_nfa);
        LevenshteinAutomatonBuilder {
            parametric_dfa: parametric_dfa,
        }
    }

    /// Builds a Finite Determinstic Automaton to compute
    /// the levenshtein distance to a fixed given `query`.
    ///
    /// There is no guarantee that the resulting DFA is minimal
    /// but its number of states is guaranteed to be smaller
    /// than `C * (query.len() + 1)` in which C is a constant that depends
    /// on the distance as well as whether transposition are supported
    /// or not.
    ///
    /// For instance for `d=2` and with transposition, `C=68`.
    pub fn build_dfa(&self, query: &str) -> DFA {
        self.parametric_dfa.build_dfa(query, false)
    }

    /// Builds a Finite Determinstic Automaton that computes
    /// the prefix levenshtein distance to a given `query`.
    ///
    /// Given a test string, the resulting distance is defined as
    ///
    /// ```formula
    ///     min( levenshtein(&test_string[..i], query } for i in 0..test_string.len() )
    /// ```
    ///
    /// Which translates as *the minimum distance of the prefixes of `test_strings`*.
    ///
    /// See also [.build_dfa(...)](./struct.LevenshteinAutomatonBuilder.html#method.build_dfa).
    pub fn build_prefix_dfa(&self, query: &str) -> DFA {
        self.parametric_dfa.build_dfa(query, true)
    }
}