The ProVal team was stopped at the end of August 2012, and reborn into a new team Toccata

## Knuth-Morris-Pratt string searching algorithm

Authors: Jean-Christophe Filliâtre

Topics: Array Data Structure

Tools: Why3

```
(*                                                                        *)
(* Proof of the Knuth-Morris-Pratt Algorithm.                             *)
(*                                                                        *)
(* Jean-Christophe FilliÃ¢tre (LRI, UniversitÃ© Paris Sud)                  *)
(* November 1998                                                          *)
(*                                                                        *)

module KnuthMorrisPratt

use import int.Int
use import ref.Ref
use import array.Array

type char

predicate matches (a1: array char) (i1: int)
(a2: array char) (i2: int) (n: int) =
0 <= i1 <= length a1 - n /\
0 <= i2 <= length a2 - n /\
forall i: int. 0 <= i < n -> a1[i1 + i] = a2[i2 + i]

lemma matches_empty:
forall a1 a2: array char, i1 i2: int.
0 <= i1 <= length a1 ->
0 <= i2 <= length a2 ->
matches a1 i1 a2 i2 0

lemma matches_right_extension:
forall a1 a2: array char, i1 i2 n: int.
matches a1 i1 a2 i2 n ->
i1 <= length a1 - n - 1 ->
i2 <= length a2 - n - 1 ->
a1[i1 + n] = a2[i2 + n] ->
matches a1 i1 a2 i2 (n + 1)

forall a1 a2: array char, i1 i2 n: int.
0 < n -> a1[i1] <> a2[i2] -> not (matches a1 i1 a2 i2 n)

forall a1 a2: array char, i1 i2 i n: int.
0 < n ->
0 <= i < n ->
a1[i1 + i] <> a2[i2 + i] -> not (matches a1 i1 a2 i2 n)

lemma matches_right_weakening:
forall a1 a2: array char, i1 i2 n n': int.
matches a1 i1 a2 i2 n -> n' < n -> matches a1 i1 a2 i2 n'

lemma matches_left_weakening:
forall a1 a2: array char, i1 i2 n n': int.
matches a1 (i1 - (n - n')) a2 (i2 - (n - n')) n ->
n' < n -> matches a1 i1 a2 i2 n'

lemma matches_sym:
forall a1 a2: array char, i1 i2 n: int.
matches a1 i1 a2 i2 n -> matches a2 i2 a1 i1 n

lemma matches_trans:
forall a1 a2 a3: array char, i1 i2 i3 n: int.
matches a1 i1 a2 i2 n -> matches a2 i2 a3 i3 n -> matches a1 i1 a3 i3 n

predicate is_next (p: array char) (j n: int) =
0 <= n < j /\
matches p (j - n) p 0 n /\
forall z: int. n < z < j -> not (matches p (j - z) p 0 z)

lemma next_iteration:
forall p a: array char, i j n: int.
0 < j < length p ->
j <= i <= length a ->
matches a (i - j) p 0 j -> is_next p j n -> matches a (i - n) p 0 n

lemma next_is_maximal:
forall p a: array char, i j n k: int.
0 < j < length p ->
j <= i <= length a ->
i - j < k < i - n ->
matches a (i - j) p 0 j ->
is_next p j n -> not (matches a k p 0 (length p))

lemma next_1_0:
forall p: array char. 1 <= length p -> is_next p 1 0

(* We first compute a table next with the procedure initnext.
* That table only depends on the pattern. *)

let initnext (p: array char)
requires { 1 <= length p }
ensures  { length result = length p &&
forall j:int. 0 < j < p.length -> is_next p j result[j] }
= let m = length p in
let next = make m 0 in
if 1 < m then begin
next[1] <- 0;
let i = ref 1 in
let j = ref 0 in
while !i < m - 1 do
invariant { 0 <= !j < !i <= m }
invariant { matches p (!i - !j) p 0 !j }
invariant { forall z:int.
!j+1 < z < !i+1 -> not matches p (!i + 1 - z) p 0 z }
invariant { forall k:int. 0 < k <= !i -> is_next p k next[k] }
variant { m - !i, !j }
if p[!i] = p[!j] then begin
i := !i + 1; j := !j + 1; next[!i] <- !j
end else
if !j = 0 then begin i := !i + 1; next[!i] <- 0 end else j := next[!j]
done
end;
next

(* The algorithm looks for an occurrence of the pattern p in a text a
* which is an array of length N.
* The function kmp returns an index i within 0..N-1 if there is an occurrence
* at the position i and N otherwise. *)

predicate first_occur (p a: array char) (r: int) =
(0 <= r < length a -> matches a r p 0 (length p)) /\
(forall k: int. 0 <= k < r -> not (matches a k p 0 (length p)))

let kmp (p a: array char)
requires { 1 <= length p /\ 0 <= length a }
ensures  { first_occur p a result }
= let m = length p in
let n = length a in
let i = ref 0 in
let j = ref 0 in
let next = initnext p in
while !j < m && !i < n do
invariant { 0 <= !j <= m /\ !j <= !i <= n }
invariant { matches a (!i - !j) p 0 !j }
invariant { forall k:int. 0 <= k < !i - !j -> not (matches a k p 0 m) }
variant { n - !i, !j }
if a[!i] = p[!j] then begin
i := !i+1; j := !j+1
end else
if !j = 0 then i := !i+1 else j := next[!j]
done;
if !j = m then !i - m else !i

end
```