How to resolve the algorithm Averages/Median step by step in the AArch64 Assembly programming language
How to resolve the algorithm Averages/Median step by step in the AArch64 Assembly programming language
Table of Contents
Problem Statement
Write a program to find the median value of a vector of floating-point numbers. The program need not handle the case where the vector is empty, but must handle the case where there are an even number of elements. In that case, return the average of the two middle values. There are several approaches to this. One is to sort the elements, and then pick the element(s) in the middle. Sorting would take at least O(n logn). Another approach would be to build a priority queue from the elements, and then extract half of the elements to get to the middle element(s). This would also take O(n logn). The best solution is to use the selection algorithm to find the median in O(n) time. Quickselect_algorithm
Let's start with the solution:
Step by Step solution about How to resolve the algorithm Averages/Median step by step in the AArch64 Assembly programming language
Source code in the aarch64 programming language
/* ARM assembly AARCH64 Raspberry PI 3B */
/* program averageMed64.s */
/* use quickselect look pseudo code in wikipedia quickselect */
/************************************/
/* Constantes */
/************************************/
/* for this file see task include a file in language AArch64 assembly*/
.include "../includeConstantesARM64.inc"
/*********************************/
/* Initialized data */
/*********************************/
.data
szMessResultValue: .asciz "Result : "
szCarriageReturn: .asciz "\n"
.align 4
TableNumber: .double 4.1, 5.6, 7.2, 1.7, 9.3, 4.4, 3.2
.equ NBELEMENTS, (. - TableNumber) / 8
TableNumber2: .double 4.1, 7.2, 1.7, 9.3, 4.4, 3.2
.equ NBELEMENTS2, (. - TableNumber2) / 8
/*********************************/
/* UnInitialized data */
/*********************************/
.bss
sZoneConv: .skip 24
sZoneConv1: .skip 24
/*********************************/
/* code section */
/*********************************/
.text
.global main
main: // entry of program
ldr x0,qAdrTableNumber // address number table
mov x1,#0 // index first item
mov x2,#NBELEMENTS -1 // index last item
bl searchMedian
ldr x0,qAdrTableNumber2 // address number table 2
mov x1,#0 // index first item
mov x2,#NBELEMENTS2 -1 // index last item
bl searchMedian
100: // standard end of the program
mov x0, #0 // return code
mov x8, #EXIT // request to exit program
svc #0 // perform the system call
qAdrszCarriageReturn: .quad szCarriageReturn
qAdrTableNumber: .quad TableNumber
qAdrTableNumber2: .quad TableNumber2
qAdrsZoneConv: .quad sZoneConv
qAdrszMessResultValue: .quad szMessResultValue
/***************************************************/
/* search median term in float array */
/***************************************************/
/* x0 contains the address of table */
/* x1 contains index of first item */
/* x2 contains index of last item */
searchMedian:
stp x1,lr,[sp,-16]! // save registers TODO: à revoir génération
stp x2,x3,[sp,-16]! // save registers
stp x4,x5,[sp,-16]! // save registers
mov x19,x0 // save array address
add x4,x1,x2
add x4,x4,#1 // sum numbers terms
tst x4,#1 // odd ?
bne 1f
lsr x3,x4,#1 // compute median index
bl select // call selection
fmov d0,x0 // save first result
sub x3,x3,#1 // second term
mov x0,x19
bl select // call selection
fmov d1,x0 // save 2ieme résult
fadd d0,d0,d1 // compute average two résults
mov x0,#2
fmov d1,x0
scvtf d1,d1 // conversion integer -> float
fdiv d0,d0,d1
b 2f
1: // even
lsr x3,x4,#1
bl select // call selection
fmov d0,x0
2:
ldr x0,qAdrsZoneConv // conversion float in decimal string
bl convertirFloat
mov x0,#3 // and display result
ldr x1,qAdrszMessResultValue
ldr x2,qAdrsZoneConv
ldr x3,qAdrszCarriageReturn
bl displayStrings
100: // end function
ldp x4,x5,[sp],16 // restaur 2 registers
ldp x2,x3,[sp],16 // restaur 2 registers
ldp x1,lr,[sp],16 // restaur 2 registers
ret
/***************************************************/
/* Appel récursif selection */
/***************************************************/
/* x0 contains the address of table */
/* x1 contains index of first item */
/* x2 contains index of last item */
/* x3 contains search index */
select:
stp x1,lr,[sp,-16]! // save registers
stp x2,x3,[sp,-16]! // save registers
stp x4,x5,[sp,-16]! // save registers
stp x6,x7,[sp,-16]! // save registers
mov x6,x3 // save search index
cmp x1,x2 // first = last ?
bne 1f
ldr x0,[x0,x1,lsl #3] // return value of first index
b 100f // yes -> end
1:
add x3,x1,x2
lsr x3,x3,#1 // compute median pivot
mov x4,x0 // save x0
mov x5,x2 // save x2
bl partition // cutting.quado 2 parts
cmp x6,x0 // pivot is ok ?
bne 2f
ldr x0,[x4,x0,lsl #3] // yes -> return value
b 100f
2:
bgt 3f
sub x2,x0,#1 // index partition - 1
mov x0,x4 // array address
mov x3,x6 // search index
bl select // select lower part
b 100f
3:
add x1,x0,#1 // index begin = index partition + 1
mov x0,x4 // array address
mov x2,x5 // last item
mov x3,x6 // search index
bl select // select higter part
100: // end function
ldp x6,x7,[sp],16 // restaur 2 registers
ldp x4,x5,[sp],16 // restaur 2 registers
ldp x2,x3,[sp],16 // restaur 2 registers
ldp x1,lr,[sp],16 // restaur 2 registers
ret // return to address lr x30
/******************************************************************/
/* Partition table elements */
/******************************************************************/
/* x0 contains the address of table */
/* x1 contains index of first item */
/* x2 contains index of last item */
/* x3 contains index of pivot */
partition:
stp x1,lr,[sp,-16]! // save registers
stp x2,x3,[sp,-16]! // save registers
stp x4,x5,[sp,-16]! // save registers
stp x6,x7,[sp,-16]! // save registers
ldr x4,[x0,x3,lsl #3] // load value of pivot
ldr x5,[x0,x2,lsl #3] // load value last index
str x5,[x0,x3,lsl #3] // swap value of pivot
str x4,[x0,x2,lsl #3] // and value last index
mov x3,x1 // init with first index
1: // begin loop
ldr x6,[x0,x3,lsl #3] // load value
cmp x6,x4 // compare loop value and pivot value
bge 2f
ldr x5,[x0,x1,lsl #3] // if < swap value table
str x6,[x0,x1,lsl #3]
str x5,[x0,x3,lsl #3]
add x1,x1,#1 // and increment index 1
2:
add x3,x3,#1 // increment index 2
cmp x3,x2 // end ?
blt 1b // no loop
ldr x5,[x0,x1,lsl #3] // swap value
str x4,[x0,x1,lsl #3]
str x5,[x0,x2,lsl #3]
mov x0,x1 // return index partition
100:
ldp x6,x7,[sp],16 // restaur 2 registers
ldp x4,x5,[sp],16 // restaur 2 registers
ldp x2,x3,[sp],16 // restaur 2 registers
ldp x1,lr,[sp],16 // restaur 2 registers
ret // return to address lr x30
/***************************************************/
/* display multi strings */
/* new version 24/05/2023 */
/***************************************************/
/* x0 contains number strings address */
/* x1 address string1 */
/* x2 address string2 */
/* x3 address string3 */
/* x4 address string4 */
/* x5 address string5 */
/* x6 address string5 */
/* x7 address string6 */
displayStrings: // INFO: displayStrings
stp x8,lr,[sp,-16]! // save registers
stp x2,fp,[sp,-16]! // save registers
add fp,sp,#32 // save paraméters address (4 registers saved * 8 bytes)
mov x8,x0 // save strings number
cmp x8,#0 // 0 string -> end
ble 100f
mov x0,x1 // string 1
bl affichageMess
cmp x8,#1 // number > 1
ble 100f
mov x0,x2
bl affichageMess
cmp x8,#2
ble 100f
mov x0,x3
bl affichageMess
cmp x8,#3
ble 100f
mov x0,x4
bl affichageMess
cmp x8,#4
ble 100f
mov x0,x5
bl affichageMess
cmp x8,#5
ble 100f
mov x0,x6
bl affichageMess
cmp x8,#6
ble 100f
mov x0,x7
bl affichageMess
100:
ldp x2,fp,[sp],16 // restaur registers
ldp x8,lr,[sp],16 // restaur registers
ret
/******************************************************************/
/* Conversion Float */
/******************************************************************/
/* d0 contains Float */
/* x0 contains address conversion area mini 20 charactèrs */
/* x0 return result length */
/* see https://blog.benoitblanchon.fr/lightweight-float-to-string/ */
convertirFloat:
stp x1,lr,[sp,-16]! // save registres
stp x2,x3,[sp,-16]! // save registres
stp x4,x5,[sp,-16]! // save registres
stp x6,x7,[sp,-16]! // save registres
stp x8,x9,[sp,-16]! // save registres
stp d1,d2,[sp,-16]! // save registres
mov x6,x0 // save area address
fmov x0,d0
mov x8,#0 // result length
mov x3,#'+'
strb w3,[x6] // signe + forcing
mov x2,x0
tbz x2,63,1f
mov x2,1
lsl x2,x2,63
bic x0,x0,x2
mov x3,#'-' // sign -
strb w3,[x6]
1:
adds x8,x8,#1 // next position
cmp x0,#0 // case 0 positive or negative
bne 2f
mov x3,#'0'
strb w3,[x6,x8] // store character 0
adds x8,x8,#1
strb wzr,[x6,x8] // store 0 final
mov x0,x8 // return length
b 100f
2:
ldr x2,iMaskExposant
mov x1,x0
and x1,x1,x2 // exposant
cmp x1,x2
bne 4f
tbz x0,51,3f // test bit 51 to zéro
mov x2,#'N' // case Nan. store byte no possible store integer
strb w2,[x6] // area no aligned
mov x2,#'a'
strb w2,[x6,#1]
mov x2,#'n'
strb w2,[x6,#2]
mov x2,#0 // 0 final
strb w2,[x6,#3]
mov x0,#3
b 100f
3: // case infini positive or négative
mov x2,#'I'
strb w2,[x6,x8]
adds x8,x8,#1
mov x2,#'n'
strb w2,[x6,x8]
adds x8,x8,#1
mov x2,#'f'
strb w2,[x6,x8]
adds x8,x8,#1
mov x2,#0
strb w2,[x6,x8]
mov x0,x8
b 100f
4:
bl normaliserFloat
mov x5,x0 // save exposant
fcvtzu d2,d0
fmov x0,d2 // part integer
scvtf d1,d2 // conversion float
fsub d1,d0,d1 // extraction part fractional
ldr d2,dConst1
fmul d1,d2,d1 // to crop it in full
fcvtzu d1,d1 // convertion integer
fmov x4,d1 // fract value
// conversion part integer to x0
mov x2,x6 // save address begin area
adds x6,x6,x8
mov x1,x6
bl conversion10
add x6,x6,x0
mov x3,#','
strb w3,[x6]
adds x6,x6,#1
mov x0,x4 // conversion part fractionnaire
mov x1,x6
bl conversion10SP
add x6,x6,x0
sub x6,x6,#1
// remove trailing zeros
5:
ldrb w0,[x6]
cmp w0,#'0'
bne 6f
sub x6,x6,#1
b 5b
6:
cmp w0,#','
bne 7f
sub x6,x6,#1
7:
cmp x5,#0 // if exposant = 0 no display
bne 8f
add x6,x6,#1
b 10f
8:
add x6,x6,#1
mov x3,#'E'
strb w3,[x6]
add x6,x6,#1
mov x0,x5 // conversion exposant
mov x3,x0
tbz x3,63,9f // exposant negative ?
neg x0,x0
mov x3,#'-'
strb w3,[x6]
adds x6,x6,#1
9:
mov x1,x6
bl conversion10
add x6,x6,x0
10:
strb wzr,[x6] // store 0 final
adds x6,x6,#1
mov x0,x6
subs x0,x0,x2 // retour de la longueur de la zone
subs x0,x0,#1 // sans le 0 final
100:
ldp d1,d2,[sp],16 // restaur registres
ldp x8,x9,[sp],16 // restaur registres
ldp x6,x7,[sp],16 // restaur registres
ldp x4,x5,[sp],16 // restaur registres
ldp x2,x3,[sp],16 // restaur registres
ldp x1,lr,[sp],16 // restaur registres
ret
iMaskExposant: .quad 0x7FF<<52
dConst1: .double 0f1E17
/***************************************************/
/* normaliser float */
/***************************************************/
/* x0 contain float value (always positive value and <> Nan) */
/* d0 return new value */
/* x0 return exposant */
normaliserFloat:
stp x1,lr,[sp,-16]! // save registers
fmov d0,x0 // value float
mov x0,#0 // exposant
ldr d1,dConstE7 // no normalisation for value < 1E7
fcmp d0,d1
blo 10f // if d0 < dConstE7
ldr d1,dConstE256
fcmp d0,d1
blo 1f
fdiv d0,d0,d1
adds x0,x0,#256
1:
ldr d1,dConstE128
fcmp d0,d1
blo 1f
fdiv d0,d0,d1
adds x0,x0,#128
1:
ldr d1,dConstE64
fcmp d0,d1
blo 1f
fdiv d0,d0,d1
adds x0,x0,#64
1:
ldr d1,dConstE32
fcmp d0,d1
blo 1f
fdiv d0,d0,d1
adds x0,x0,#32
1:
ldr d1,dConstE16
fcmp d0,d1
blo 2f
fdiv d0,d0,d1
adds x0,x0,#16
2:
ldr d1,dConstE8
fcmp d0,d1
blo 3f
fdiv d0,d0,d1
adds x0,x0,#8
3:
ldr d1,dConstE4
fcmp d0,d1
blo 4f
fdiv d0,d0,d1
adds x0,x0,#4
4:
ldr d1,dConstE2
fcmp d0,d1
blo 5f
fdiv d0,d0,d1
adds x0,x0,#2
5:
ldr d1,dConstE1
fcmp d0,d1
blo 10f
fdiv d0,d0,d1
adds x0,x0,#1
10:
ldr d1,dConstME5 // pas de normalisation pour les valeurs > 1E-5
fcmp d0,d1
bhi 100f // fin
ldr d1,dConstME255
fcmp d0,d1
bhi 11f
ldr d1,dConstE256
fmul d0,d0,d1
subs x0,x0,#256
11:
ldr d1,dConstME127
fcmp d0,d1
bhi 11f
ldr d1,dConstE128
fmul d0,d0,d1
subs x0,x0,#128
11:
ldr d1,dConstME63
fcmp d0,d1
bhi 11f
ldr d1,dConstE64
fmul d0,d0,d1
subs x0,x0,#64
11:
ldr d1,dConstME31
fcmp d0,d1
bhi 11f
ldr d1,dConstE32
fmul d0,d0,d1
subs x0,x0,#32
11:
ldr d1,dConstME15
fcmp d0,d1
bhi 12f
ldr d1,dConstE16
fmul d0,d0,d1
subs x0,x0,#16
12:
ldr d1,dConstME7
fcmp d0,d1
bhi 13f
ldr d1,dConstE8
fmul d0,d0,d1
subs x0,x0,#8
13:
ldr d1,dConstME3
fcmp d0,d1
bhi 14f
ldr d1,dConstE4
fmul d0,d0,d1
subs x0,x0,#4
14:
ldr d1,dConstME1
fcmp d0,d1
bhi 15f
ldr d1,dConstE2
fmul d0,d0,d1
subs x0,x0,#2
15:
ldr d1,dConstE0
fcmp d0,d1
bhi 100f
ldr d1,dConstE1
fmul d0,d0,d1
subs x0,x0,#1
100: // fin standard de la fonction
ldp x1,lr,[sp],16 // restaur registres
ret
.align 2
dConstE7: .double 0f1E7
dConstE256: .double 0f1E256
dConstE128: .double 0f1E128
dConstE64: .double 0f1E64
dConstE32: .double 0f1E32
dConstE16: .double 0f1E16
dConstE8: .double 0f1E8
dConstE4: .double 0f1E4
dConstE2: .double 0f1E2
dConstE1: .double 0f1E1
dConstME5: .double 0f1E-5
dConstME255: .double 0f1E-255
dConstME127: .double 0f1E-127
dConstME63: .double 0f1E-63
dConstME31: .double 0f1E-31
dConstME15: .double 0f1E-15
dConstME7: .double 0f1E-7
dConstME3: .double 0f1E-3
dConstME1: .double 0f1E-1
dConstE0: .double 0f1E0
/******************************************************************/
/* Décimal Conversion */
/******************************************************************/
/* x0 contain value et x1 address conversion area */
conversion10SP:
stp x1,lr,[sp,-16]! // save registers
stp x2,x3,[sp,-16]! // save registers
stp x4,x5,[sp,-16]! // save registers
mov x5,x1
mov x4,#16
mov x2,x0
mov x1,#10 // décimal conversion
1: // conversion loop
mov x0,x2 // copy begin number or quotient
udiv x2,x0,x1 // division by 10
msub x3,x1,x2,x0 // compute remainder
add x3,x3,#48 // compute digit
strb w3,[x5,x4] // store byte address area (x5) + offset (x4)
subs x4,x4,#1 // position precedente
bge 1b
strb wzr,[x5,16] // 0 final
100:
ldp x4,x5,[sp],16 // restaur registers
ldp x2,x3,[sp],16 // restaur registers
ldp x1,lr,[sp],16 // restaur registers
ret
/***************************************************/
/* ROUTINES INCLUDE */
/***************************************************/
/* for this file see task include a file in language AArch64 assembly*/
.include "../includeARM64.inc"
You may also check:How to resolve the algorithm Stack traces step by step in the D programming language
You may also check:How to resolve the algorithm Array concatenation step by step in the APL programming language
You may also check:How to resolve the algorithm Integer comparison step by step in the RPG programming language
You may also check:How to resolve the algorithm Sum of elements below main diagonal of matrix step by step in the Phix programming language
You may also check:How to resolve the algorithm User input/Text step by step in the Quackery programming language