How to resolve the algorithm Levenshtein distance step by step in the OCaml programming language

Published on 12 May 2024 09:40 PM

How to resolve the algorithm Levenshtein distance step by step in the OCaml programming language

Table of Contents

Problem Statement

In information theory and computer science, the Levenshtein distance is a metric for measuring the amount of difference between two sequences (i.e. an edit distance). The Levenshtein distance between two strings is defined as the minimum number of edits needed to transform one string into the other, with the allowable edit operations being insertion, deletion, or substitution of a single character.

The Levenshtein distance between "kitten" and "sitting" is 3, since the following three edits change one into the other, and there isn't a way to do it with fewer than three edits:

The Levenshtein distance between   "rosettacode",   "raisethysword"   is   8. The distance between two strings is same as that when both strings are reversed.

Implements a Levenshtein distance function, or uses a library function, to show the Levenshtein distance between   "kitten"   and   "sitting".

Let's start with the solution:

Step by Step solution about How to resolve the algorithm Levenshtein distance step by step in the OCaml programming language

Source code in the ocaml programming language

let minimum a b c =
  min a (min b c)

let levenshtein_distance s t =
  let m = String.length s
  and n = String.length t in
  (* for all i and j, d.(i).(j) will hold the Levenshtein distance between
     the first i characters of s and the first j characters of t *)
  let d = Array.make_matrix (m+1) (n+1) 0 in

  for i = 0 to m do
    d.(i).(0) <- i  (* the distance of any first string to an empty second string *)
  done;
  for j = 0 to n do
    d.(0).(j) <- j  (* the distance of any second string to an empty first string *)
  done;

  for j = 1 to n do
    for i = 1 to m do

      if s.[i-1] = t.[j-1] then
        d.(i).(j) <- d.(i-1).(j-1)  (* no operation required *)
      else
        d.(i).(j) <- minimum
                       (d.(i-1).(j) + 1)   (* a deletion *)
                       (d.(i).(j-1) + 1)   (* an insertion *)
                       (d.(i-1).(j-1) + 1) (* a substitution *)
    done;
  done;

  d.(m).(n)
;;

let test s t =
  Printf.printf " %s -> %s = %d\n" s t (levenshtein_distance s t);
;;

let () =
  test "kitten" "sitting";
  test "rosettacode" "raisethysword";
;;


let levenshtein s t =
   let rec dist i j = match (i,j) with
      | (i,0) -> i
      | (0,j) -> j
      | (i,j) ->
         if s.[i-1] = t.[j-1] then dist (i-1) (j-1)
         else let d1, d2, d3 = dist (i-1) j, dist i (j-1), dist (i-1) (j-1) in
         1 + min d1 (min d2 d3)
   in
   dist (String.length s) (String.length t)

let test s t =
  Printf.printf " %s -> %s = %d\n" s t (levenshtein s t)

let () =
  test "kitten" "sitting";
  test "rosettacode" "raisethysword";


  

You may also check:How to resolve the algorithm Sorting algorithms/Gnome sort step by step in the E programming language
You may also check:How to resolve the algorithm Grayscale image step by step in the Fōrmulæ programming language
You may also check:How to resolve the algorithm Boolean values step by step in the Emacs Lisp programming language
You may also check:How to resolve the algorithm Idoneal numbers step by step in the PL/M programming language
You may also check:How to resolve the algorithm Singly-linked list/Element insertion step by step in the Scala programming language