How to resolve the algorithm K-means++ clustering step by step in the Phix programming language

Published on 12 May 2024 09:40 PM

How to resolve the algorithm K-means++ clustering step by step in the Phix programming language

Table of Contents

Problem Statement

The task is to implement the K-means++ algorithm. Produce a function which takes two arguments: the number of clusters K, and the dataset to classify. K is a positive integer and the dataset is a list of points in the Cartesian plane. The output is a list of clusters (related sets of points, according to the algorithm). For extra credit (in order): Extra credit is only awarded if the examples given demonstrate the feature in question. To earn credit for 1. and 2., visualize 6 clusters of 30,000 points in ℝ2. It is not necessary to provide visualization for spaces higher than ℝ2 but the examples should demonstrate features 3. and 4. if the solution has them.

Let's start with the solution:

Step by Step solution about How to resolve the algorithm K-means++ clustering step by step in the Phix programming language

Source code in the phix programming language

-- demo\rosetta\K_means_clustering.exw
--  Press F5 to restart
include pGUI.e

Ihandle dlg, canvas, timer
cdCanvas cddbuffer, cdcanvas

constant TITLE = "K-means++ clustering"

constant useGoInitialData = false       -- (not very well centered)

constant N = 30000,                     -- number of points
         K = 16                         -- number of clusters

sequence {Px, Py, Pc} @= repeat(0,N),   -- coordinates of points and their cluster
         {Cx, Cy} @= repeat(0,K)        -- coordinates of centroid of cluster

constant colours = {CD_RED, CD_DARK_RED, CD_BLUE, CD_DARK_BLUE, CD_CYAN, CD_DARK_CYAN, 
                    CD_GREEN, CD_DARK_GREEN, CD_MAGENTA, CD_DARK_MAGENTA, CD_YELLOW,
                    CD_DARK_YELLOW, CD_DARK_ORANGE, CD_INDIGO, CD_PURPLE, CD_DARK_GREY}
if length(colours)
 
function Centroid()
-- Find new centroids of points grouped with current centroids
bool change = false
    for c=1 to K do                         -- for each centroid...
        integer x=0, y=0, count:= 0;        -- find new centroid
        for i=1 to N do                     -- for all points
            if Pc[i] = c then               -- grouped with current centroid...
                x += Px[i]
                y += Py[i]
                count += 1
            end if
        end for
        if count!=0 then
            x = floor(x/count)
            y = floor(y/count)
            if Cx[c]!=x
            or Cy[c]!=y then
                Cx[c] = x
                Cy[c] = y
                change:= true
            end if
        end if
    end for
    return change
end function
 
function sq(atom x) return x*x end function

procedure Voronoi()             -- Group points with their nearest centroid
    integer d2,                 -- distance squared, 
            min_d2              -- minimum distance squared
    for i=1 to N do             -- for each point...
        min_d2 := #3FFFFFFF     -- find closest centroid
        for c=1 to K do
            d2 := sq(Px[i]-Cx[c]) + sq(Py[i]-Cy[c])
            if d2
                min_d2 := d2
                Pc[i] := c      -- update closest centroid
            end if
        end for
    end for
end procedure
 
function rand_xy()              -- Return random X,Y biased for polar coordinates
    atom d := rand(240)-1,              -- distance: 0..239
         a := rnd()*2*PI                -- angle:    0..2pi
    integer x:= floor(d*cos(a))+320,    -- rectangular coords centered on screen
            y:= floor(d*sin(a))+240     --     (that is, assuming 640x480)
    return {x,y}
end function

--This little bit is copied from/based on Go:
constant k = K,
         nPoints = N,
         xBox = 300,
         yBox = 200,
         stdv = 30

function genECData()
    sequence orig = repeat({0,0}, k),
             data = repeat({0,0,0}, nPoints)
    integer n = 0, nk = k
    for i=1 to k do
        integer x := rand(xBox)+320,
                y := rand(yBox)+240
        orig[i] = {x, y}
        for j=1 to floor((nPoints-n)/nk) do
            n += 1
            atom d := rand(stdv)-1,             -- distance: 0..239
                 a := rnd()*2*PI                -- angle:    0..2pi
            integer nx:= floor(d*cos(a))+x, -- rectangular coords centered on screen
                    ny:= floor(d*sin(a))+y  --     (that is, assuming 640x480)
            data[n] = {nx,ny,i}
        end for
        nk -= 1
    end for
    if n!=nPoints then ?9/0 end if
    return {orig, data}
end function
--
 
integer iteration = 0

function redraw_cb(Ihandle /*ih*/, integer /*posx*/, integer /*posy*/)
    integer {w, h} = IupGetIntInt(canvas, "DRAWSIZE")
    cdCanvasActivate(cddbuffer)
    if iteration=0 then
        if useGoInitialData then
            sequence {origins,data} = genECData()
            {Px, Py, Pc} = columnize(data)
            {Cx, Cy} = columnize(origins)
        else
            for i=1 to N do {Px[i],Py[i]} = rand_xy() end for   -- random set of points
            for i=1 to K do {Cx[i],Cy[i]} = rand_xy() end for   -- random set of cluster centroids
        end if
    end if
    sequence {r,g,b} @ = repeat(0,w*h)
    Voronoi()
    bool change := Centroid()
    for i=1 to N do
        integer idx = Px[i]+(Py[i]-1)*w
        {r[idx],g[idx],b[idx]} = cdDecodeColor(colours[Pc[i]])
    end for
    for i=1 to K do
        integer idx = Cx[i]+(Cy[i]-1)*w
        {r[idx],g[idx],b[idx]} = cdDecodeColor(CD_WHITE)
    end for
    cdCanvasPutImageRectRGB(cddbuffer, w, h, {r,g,b})
    cdCanvasFlush(cddbuffer)
    if change then
        iteration += 1
        IupSetStrAttribute(dlg, "TITLE", "%s (iteration %d)",{TITLE,iteration})
    else
        IupSetInt(timer,"RUN",0)                -- (stop timer)
        IupSetStrAttribute(dlg, "TITLE", TITLE)
    end if
    return IUP_DEFAULT
end function

function timer_cb(Ihandle /*ih*/)
    IupUpdate(canvas)
    return IUP_IGNORE
end function

function map_cb(Ihandle ih)
    cdcanvas = cdCreateCanvas(CD_IUP, ih)
    cddbuffer = cdCreateCanvas(CD_DBUFFER, cdcanvas)
    return IUP_DEFAULT
end function

function esc_close(Ihandle /*ih*/, atom c)
    if c=K_ESC then return IUP_CLOSE end if
    if c=K_F5 then
        iteration = 0
        IupSetInt(timer,"RUN",1)                -- (restart timer)
    end if
    return IUP_CONTINUE
end function

procedure main()
    IupOpen()

    canvas = IupCanvas(NULL)
    IupSetAttribute(canvas, "RASTERSIZE", "640x480")
    IupSetCallback(canvas, "MAP_CB", Icallback("map_cb"))
    IupSetCallback(canvas, "ACTION", Icallback("redraw_cb"))

    timer = IupTimer(Icallback("timer_cb"), 100)

    dlg = IupDialog(canvas,"DIALOGFRAME=YES")
    IupSetAttribute(dlg, "TITLE", TITLE)
    IupSetCallback(dlg, "K_ANY",     Icallback("esc_close"))

    IupShow(dlg)
    IupSetAttribute(canvas, "RASTERSIZE", NULL)
    IupMainLoop()
    IupClose()
end procedure

main()


  

You may also check:How to resolve the algorithm Loops/For step by step in the Brat programming language
You may also check:How to resolve the algorithm Identity matrix step by step in the MATLAB / Octave programming language
You may also check:How to resolve the algorithm Sum of a series step by step in the Phix programming language
You may also check:How to resolve the algorithm Naming conventions step by step in the Arturo programming language
You may also check:How to resolve the algorithm HTTP step by step in the Seed7 programming language