How to resolve the algorithm Soundex step by step in the JavaScript programming language

Published on 12 May 2024 09:40 PM

How to resolve the algorithm Soundex step by step in the JavaScript programming language

Table of Contents

Problem Statement

Soundex is an algorithm for creating indices for words based on their pronunciation.

The goal is for homophones to be encoded to the same representation so that they can be matched despite minor differences in spelling   (from the   soundex   Wikipedia article). There is a major issue in many of the implementations concerning the separation of two consonants that have the same soundex code! According to the official Rules [[1]]. So check for instance if Ashcraft is coded to A-261.

Let's start with the solution:

Step by Step solution about How to resolve the algorithm Soundex step by step in the JavaScript programming language

First Soundex Function:

This function takes a string as input and generates a 4-digit Soundex code. It works as follows:

  1. It converts the input string to lowercase, splits it into a character array, and removes the first character from the array.
  2. It creates a mapping of letters to digits: vowels ("aeiou") map to an empty string, and consonants are grouped into four categories (b/f/p/v, c/g/j/k/q/s/x/z, d/t, l, m/n, r) and mapped to the corresponding digits (1-6).
  3. It iterates through the character array, replacing each consonant with its corresponding digit. It removes any duplicate digits that occur consecutively.
  4. It inserts the first character of the input string back into the code.
  5. It truncates the code to 4 characters and converts it to uppercase.

Second Soundex Function:

This function provides a more detailed implementation of the Soundex algorithm with two modes: Simple Soundex and NARA Soundex. It offers several enhancements:

  1. Character Code Mapping: It uses a more complex mapping of characters to digits, as defined by the Soundex algorithm.
  2. Alphabetic Character Check: It ensures that only alphabetic characters are considered in the input string.
  3. Handling of Letters with Diacritics: It removes diacritics from characters before processing.
  4. NARA Soundex: When NARA Soundex mode is enabled, it omits the digit 7 from the character mapping and replaces any occurrences of 7 with an empty string.
  5. Curry Function: It uses a curry function to create two variants of the Soundex function: one for Simple Soundex and one for NARA Soundex.

Testing:

Both functions include extensive test cases to verify their accuracy. They compare the generated Soundex codes with expected values for various input strings. Any discrepancies are reported for debugging purposes.

Usage:

The provided Soundex functions can be used to generate Soundex codes for names, addresses, or other text data. They are commonly used in indexing, searching, and comparing text records for efficient retrieval.

Source code in the javascript programming language

var soundex = function (s) {
     var a = s.toLowerCase().split('')
         f = a.shift(),
         r = '',
         codes = {
             a: '', e: '', i: '', o: '', u: '',
             b: 1, f: 1, p: 1, v: 1,
             c: 2, g: 2, j: 2, k: 2, q: 2, s: 2, x: 2, z: 2,
             d: 3, t: 3,
             l: 4,
             m: 5, n: 5,
             r: 6
         };
 
     r = f +
         a
         .map(function (v, i, a) { return codes[v] })
         .filter(function (v, i, a) { return ((i === 0) ? v !== codes[f] : v !== a[i - 1]); })
         .join('');
 
     return (r + '000').slice(0, 4).toUpperCase();
};

var tests = {
  "Soundex":     "S532",
  "Example":     "E251",
  "Sownteks":    "S532",
  "Ekzampul":    "E251",
  "Euler":       "E460",
  "Gauss":       "G200",
  "Hilbert":     "H416",
  "Knuth":       "K530",
  "Lloyd":       "L300",
  "Lukasiewicz": "L222",
  "Ellery":      "E460",
  "Ghosh":       "G200",
  "Heilbronn":   "H416",
  "Kant":        "K530",
  "Ladd":        "L300",
  "Lissajous":   "L222",
  "Wheaton":     "W350",
  "Ashcraft":    "A226",
  "Burroughs":   "B622",
  "Burrows":     "B620",
  "O'Hara":      "O600"
  };

for (var i in tests)
  if (tests.hasOwnProperty(i)) {
    console.log(
      i +
      '    \t' +
      tests[i] +
      '\t' +
      soundex(i) +
      '\t' +
      (soundex(i) === tests[i])
    );
}

// Soundex     S532  S532  true
// Example     E251  E251  true
// Sownteks    S532  S532  true
// Ekzampul    E251  E251  true
// Euler       E460  E460  true
// Gauss       G200  G200  true
// Hilbert     H416  H416  true
// Knuth       K530  K530  true
// Lloyd       L300  L300  true
// Lukasiewicz L222  L222  true
// Ellery      E460  E460  true
// Ghosh       G200  G200  true
// Heilbronn   H416  H416  true
// Kant        K530  K530  true
// Ladd        L300  L300  true
// Lissajous   L222  L222  true
// Wheaton     W350  W350  true
// Ashcraft    A226  A226  true
// Burroughs   B622  B622  true
// Burrows     B620  B620  true
// O'Hara      O600  O600  true


function soundex(t) {
  t = t.toUpperCase().replace(/[^A-Z]/g, '');
  return (t[0] || '0') + t.replace(/[HW]/g, '')
    .replace(/[BFPV]/g, '1')
    .replace(/[CGJKQSXZ]/g, '2')
    .replace(/[DT]/g, '3')
    .replace(/[L]/g, '4')
    .replace(/[MN]/g, '5')
    .replace(/[R]/g, '6')
    .replace(/(.)\1+/g, '$1')
    .substr(1)
    .replace(/[AEOIUHWY]/g, '')
    .concat('000')
    .substr(0, 3);
}

// tests
[ ["Example", "E251"], ["Sownteks", "S532"], ["Lloyd", "L300"], ["12346", "0000"],
 ["4-H", "H000"], ["Ashcraft", "A261"], ["Ashcroft", "A261"], ["auerbach", "A612"],
 ["bar", "B600"], ["barre", "B600"], ["Baragwanath", "B625"], ["Burroughs", "B620"],
 ["Burrows", "B620"], ["C.I.A.", "C000"], ["coöp", "C100"], ["D-day", "D000"],
 ["d jay", "D200"], ["de la Rosa", "D462"], ["Donnell", "D540"], ["Dracula", "D624"],
 ["Drakula", "D624"], ["Du Pont", "D153"], ["Ekzampul", "E251"], ["example", "E251"],
 ["Ellery", "E460"], ["Euler", "E460"], ["F.B.I.", "F000"], ["Gauss", "G200"],
 ["Ghosh", "G200"], ["Gutierrez", "G362"], ["he", "H000"], ["Heilbronn", "H416"],
 ["Hilbert", "H416"], ["Jackson", "J250"], ["Johnny", "J500"], ["Jonny", "J500"],
 ["Kant", "K530"], ["Knuth", "K530"], ["Ladd", "L300"], ["Lloyd", "L300"],
 ["Lee", "L000"], ["Lissajous", "L222"], ["Lukasiewicz", "L222"], ["naïve", "N100"],
 ["Miller", "M460"], ["Moses", "M220"], ["Moskowitz", "M232"], ["Moskovitz", "M213"],
 ["O'Conner", "O256"], ["O'Connor", "O256"], ["O'Hara", "O600"], ["O'Mally", "O540"],
 ["Peters", "P362"], ["Peterson", "P362"], ["Pfister", "P236"], ["R2-D2", "R300"],
 ["rÄ≈sumÅ∙", "R250"], ["Robert", "R163"], ["Rupert", "R163"], ["Rubin", "R150"],
 ["Soundex", "S532"], ["sownteks", "S532"], ["Swhgler", "S460"], ["'til", "T400"],
 ["Tymczak", "T522"], ["Uhrbach", "U612"], ["Van de Graaff", "V532"],
 ["VanDeusen", "V532"], ["Washington", "W252"], ["Wheaton", "W350"],
 ["Williams", "W452"], ["Woolcock", "W422"]
].forEach(function(v) {
  var a = v[0], t = v[1], d = soundex(a);
   if (d !== t) {
    console.log('soundex("' + a + '") was ' + d + ' should be ' + t);
  }
});


(() => {
    'use strict';

    // Simple Soundex or NARA Soundex (if blnNara = true)

    // soundex :: Bool -> String -> String
    const soundex = (blnNara, name) => {

        // code :: Char -> Char
        const code = c => ['AEIOU', 'BFPV', 'CGJKQSXZ', 'DT', 'L', 'MN', 'R', 'HW']
            .reduce((a, x, i) =>
                a ? a : (x.indexOf(c) !== -1 ? i.toString() : a), '');

        // isAlpha :: Char -> Boolean
        const isAlpha = c => {
            const d = c.charCodeAt(0);
            return d > 64 && d < 91;
        };

        const s = name.toUpperCase()
            .split('')
            .filter(isAlpha);

        return (s[0] || '0') +
            s.map(code)
            .join('')
            .replace(/7/g, blnNara ? '' : '7')
            .replace(/(.)\1+/g, '$1')
            .substr(1)
            .replace(/[07]/g, '')
            .concat('000')
            .substr(0, 3);
    };

    // curry :: ((a, b) -> c) -> a -> b -> c
    const curry = f => a => b => f(a, b),
        [simpleSoundex, naraSoundex] = [false, true]
        .map(bln => curry(soundex)(bln));

    // TEST
    return [
        ["Example", "E251"],
        ["Sownteks", "S532"],
        ["Lloyd", "L300"],
        ["12346", "0000"],
        ["4-H", "H000"],
        ["Ashcraft", "A261"],
        ["Ashcroft", "A261"],
        ["auerbach", "A612"],
        ["bar", "B600"],
        ["barre", "B600"],
        ["Baragwanath", "B625"],
        ["Burroughs", "B620"],
        ["Burrows", "B620"],
        ["C.I.A.", "C000"],
        ["coöp", "C100"],
        ["D-day", "D000"],
        ["d jay", "D200"],
        ["de la Rosa", "D462"],
        ["Donnell", "D540"],
        ["Dracula", "D624"],
        ["Drakula", "D624"],
        ["Du Pont", "D153"],
        ["Ekzampul", "E251"],
        ["example", "E251"],
        ["Ellery", "E460"],
        ["Euler", "E460"],
        ["F.B.I.", "F000"],
        ["Gauss", "G200"],
        ["Ghosh", "G200"],
        ["Gutierrez", "G362"],
        ["he", "H000"],
        ["Heilbronn", "H416"],
        ["Hilbert", "H416"],
        ["Jackson", "J250"],
        ["Johnny", "J500"],
        ["Jonny", "J500"],
        ["Kant", "K530"],
        ["Knuth", "K530"],
        ["Ladd", "L300"],
        ["Lloyd", "L300"],
        ["Lee", "L000"],
        ["Lissajous", "L222"],
        ["Lukasiewicz", "L222"],
        ["naïve", "N100"],
        ["Miller", "M460"],
        ["Moses", "M220"],
        ["Moskowitz", "M232"],
        ["Moskovitz", "M213"],
        ["O'Conner", "O256"],
        ["O'Connor", "O256"],
        ["O'Hara", "O600"],
        ["O'Mally", "O540"],
        ["Peters", "P362"],
        ["Peterson", "P362"],
        ["Pfister", "P236"],
        ["R2-D2", "R300"],
        ["rÄ≈sumÅ∙", "R250"],
        ["Robert", "R163"],
        ["Rupert", "R163"],
        ["Rubin", "R150"],
        ["Soundex", "S532"],
        ["sownteks", "S532"],
        ["Swhgler", "S460"],
        ["'til", "T400"],
        ["Tymczak", "T522"],
        ["Uhrbach", "U612"],
        ["Van de Graaff", "V532"],
        ["VanDeusen", "V532"],
        ["Washington", "W252"],
        ["Wheaton", "W350"],
        ["Williams", "W452"],
        ["Woolcock", "W422"]
    ].reduce((a, [name, naraCode]) => {
        const naraTest = naraSoundex(name),
            simpleTest = simpleSoundex(name);

        const logNara = naraTest !== naraCode ? (
                `${name} was ${naraTest} should be ${naraCode}`
            ) : '',
            logDelta = (naraTest !== simpleTest ? (
                `${name} -> NARA: ${naraTest} vs Simple: ${simpleTest}`
            ) : '');

        return logNara.length || logDelta.length ? (
            a + [logNara, logDelta].join('\n')
        ) : a;
    }, '');
})();


  

You may also check:How to resolve the algorithm Modular exponentiation step by step in the Julia programming language
You may also check:How to resolve the algorithm Classes step by step in the XBS programming language
You may also check:How to resolve the algorithm Align columns step by step in the Ada programming language
You may also check:How to resolve the algorithm Kronecker product based fractals step by step in the Nim programming language
You may also check:How to resolve the algorithm Two's complement step by step in the ARM Assembly programming language