How to resolve the algorithm SHA-256 step by step in the AArch64 Assembly programming language

Published on 12 May 2024 09:40 PM

How to resolve the algorithm SHA-256 step by step in the AArch64 Assembly programming language

Table of Contents

Problem Statement

SHA-256 is the recommended stronger alternative to SHA-1. See FIPS PUB 180-4 for implementation details. Either by using a dedicated library or implementing the algorithm in your language, show that the SHA-256 digest of the string "Rosetta code" is: 764faf5c61ac315f1497f9dfa542713965b785e5cc2f707d6468d7d1124cdfcf

Let's start with the solution:

Step by Step solution about How to resolve the algorithm SHA-256 step by step in the AArch64 Assembly programming language

Source code in the aarch64 programming language

/* ARM assembly AARCH64 Raspberry PI 3B */
/*  program sha256_64.s   */

/*******************************************/
/* Constantes file                         */
/*******************************************/
/* for this file see task include a file in language AArch64 assembly*/
.include "../includeConstantesARM64.inc"

.equ LGHASH, 32                  // result length 


/*******************************************/
/* Structures                               */
/********************************************/
/* example structure  variables  */
    .struct  0
var_a:                     // a
    .struct  var_a + 4
var_b:                     // b
    .struct  var_b + 4
var_c:                     // c
    .struct  var_c + 4
var_d:                     // d
    .struct  var_d + 4
var_e:                     // e
    .struct  var_e + 4
var_f:                     // f
    .struct  var_f + 4
var_g:                     // g
    .struct  var_g + 4
var_h:                     // h
    .struct  var_h + 4

/*********************************/
/* Initialized data              */
/*********************************/
.data
szMessRosetta:        .asciz "Rosetta code"
szMessTest1:           .asciz "abc" 
szMessSup64:           .ascii "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                       .ascii "abcdefghijklmnopqrstuvwxyz"
                       .asciz "1234567890AZERTYUIOP"
szMessTest2:           .asciz "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
szMessFinPgm:          .asciz "Program End ok.\n"
szMessResult:          .asciz "Rosetta code => "
szCarriageReturn:   .asciz "\n"

/* array constantes Hi */
tbConstHi:           .int 0x6A09E667       // H0
                     .int 0xBB67AE85       // H1
                     .int 0x3C6EF372       // H2
                     .int 0xA54FF53A       // H3
                     .int 0x510E527F       // H4
                     .int 0x9B05688C       // H5
                     .int 0x1F83D9AB       // H6
                     .int 0x5BE0CD19       // H7
/* array  64 constantes Kt */
tbConstKt:
  .int 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
  .int 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
  .int 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
  .int 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
  .int 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
  .int 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
  .int 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
  .int 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2

/*********************************/
/* UnInitialized data            */
/*********************************/
.bss
.align 4
qNbBlocs:                    .skip 8
sZoneConv:                   .skip 24
sZoneTrav:                   .skip 1000
.align 8
tbH:                         .skip 4 * 8         // 8 variables H
tbabcdefgh:                  .skip 4 * 8
tbW:                         .skip 4 * 64        // 64 words W
/*********************************/
/*  code section                 */
/*********************************/
.text
.global main 
main:                                      // entry of program 
                      
    ldr x0,qAdrszMessRosetta
    //ldr x0,qAdrszMessTest1
    //ldr x0,qAdrszMessTest2
    //ldr x0,qAdrszMessSup64
    bl computeSHA256                       // call routine SHA1

    ldr x0,qAdrszMessResult
    bl affichageMess                       // display message

    ldr x0, qAdrtbH
    bl displaySHA1

    ldr x0,qAdrszMessFinPgm
    bl affichageMess                       // display message
 

100:                                       // standard end of the program 
    mov x0,0                               // return code
    mov x8,EXIT                            // request to exit program
    svc 0                                  // perform the system call
 
qAdrszCarriageReturn:     .quad szCarriageReturn
qAdrszMessResult:         .quad szMessResult
qAdrszMessRosetta:        .quad szMessRosetta
qAdrszMessTest1:          .quad szMessTest1
qAdrszMessTest2:          .quad szMessTest2
qAdrsZoneTrav:            .quad sZoneTrav
qAdrsZoneConv:            .quad sZoneConv
qAdrszMessFinPgm:         .quad szMessFinPgm
qAdrszMessSup64:          .quad szMessSup64
/******************************************************************/
/*     compute SHA1                         */ 
/******************************************************************/
/* x0 contains the address of the message */
computeSHA256:
    stp x1,lr,[sp,-16]!      // save  registers
    ldr x1,qAdrsZoneTrav
    mov x2,#0                // counter length 
debCopy:                     // copy string in work area
    ldrb w3,[x0,x2]
    strb w3,[x1,x2]
    cmp x3,#0
    add x4,x2,1
    csel x2,x4,x2,ne
    bne debCopy
    lsl x6,x2,#3             // initial message length in bits 
    mov x3,#0b10000000       // add bit 1 at end of string
    strb w3,[x1,x2]
    add x2,x2,#1             // length in bytes
    str xzr,[x1,x2]          // zeroes in end of array
    lsl x4,x2,#3             // length in bits
addZeroes:
    lsr x5,x2,#6
    lsl x5,x5,#6
    sub x5,x2,x5
    cmp x5,#56
    beq storeLength          // yes -> end add
    str xzr,[x1,x2]          // add zero at message end
    add x2,x2,#1              // increment lenght bytes 
    add x4,x4,#8             // increment length in bits
    b addZeroes
storeLength:
    add x2,x2,#4             // add four bytes
    rev w6,w6                // inversion bits initials message length
    str w6,[x1,x2]           // and store at end

    ldr x7,qAdrtbConstHi     // constantes H address
    ldr x4,qAdrtbH           // start area H
    mov x5,#0
loopConst:                   // init array H with start constantes
    ldr w6,[x7,x5,lsl #2]    // load constante
    str w6,[x4,x5,lsl #2]    // and store
    add x5,x5,#1
    cmp x5,#8
    blt loopConst
                             // split into block of 64 bytes
    add x2,x2,#4                // 
    lsr x4,x2,#6             // blocks number
    ldr x0,qAdrqNbBlocs
    str x4,[x0]              // save block maxi
    mov x7,#0                // n° de block et x1 contient l adresse zone de travail
loopBlock:                   // begin loop of each block of 64 bytes
    mov x0,x7
    bl inversion             // inversion each word because little indian
    ldr x3,qAdrtbW           // working area W address
    mov x6,#0                // indice t
                             /* x2  address begin each block */
    ldr x1,qAdrsZoneTrav
    add x2,x1,x7,lsl #6      //  compute block begin  indice * 4 * 16

loopPrep:                    // loop for expand 80 words
    cmp x6,#15               // 
    bgt expand1
    ldr w0,[x2,x6,lsl #2]    // load word message
    str w0,[x3,x6,lsl #2]    // store in first 16 block 
    b expandEnd

expand1:
    sub x8,x6,#2
    ldr w9,[x3,x8,lsl #2]
    ror w10,w9,#17           // fonction e1 (256)
    ror w11,w9,#19
    eor w10,w10,w11
    lsr w11,w9,#10
    eor w10,w10,w11
    sub x8,x6,#7
    ldr w9,[x3,x8,lsl #2]
    add w9,w9,w10            // + w - 7
    sub x8,x6,#15
    ldr w10,[x3,x8,lsl #2]
    ror w11,w10,#7          // fonction e0 (256)
    ror w12,w10,#18
    eor w11,w11,w12
    lsr w12,w10,#3
    eor w10,w11,w12
    add w9,w9,w10
    sub x8,x6,#16
    ldr w11,[x3,x8,lsl #2]
    add w9,w9,w11

    str w9,[x3,x6,lsl #2] 
expandEnd:
    add x6,x6,#1
    cmp x6,#64                 // 64 words ?
    blt loopPrep               // and loop


    /* COMPUTING THE MESSAGE DIGEST */
    /* x1  area H constantes address */
    /* x3  working area W address  */
    /* x5  address constantes K   */
    /* x6  counter t */
    /* x7  block counter */
    /* x8  addresse variables a b c d e f g h  */
                               // init variable a b c d e f g h
    ldr x0,qAdrtbH
    ldr x8,qAdrtbabcdefgh
    mov x1,#0
loopInita:
    ldr w9,[x0,x1,lsl #2]
    str w9,[x8,x1,lsl #2]
    add x1,x1,#1
    cmp x1,#8
    blt loopInita

    
    ldr x1,qAdrtbConstHi
    ldr x5,qAdrtbConstKt
    mov x6,#0
loop64T:                      // begin loop 64 t
    ldr w9,[x8,#var_h]
    ldr w10,[x8,#var_e]       // calcul T1
    ror w11,w10,#6            // fonction sigma 1
    ror w12,w10,#11
    eor w11,w11,w12
    ror w12,w10,#25
    eor w11,w11,w12
    add w9,w9,w11             // h + sigma1 (e)
    ldr w0,[x8,#var_f]        //  fonction ch  x and y xor (non x and z)
    ldr w4,[x8,#var_g]
    and w11,w10,w0
    mvn w12,w10
    and w12,w12,w4
    eor w11,w11,w12
    add w9,w9,w11             // h + sigma1 (e) + ch (e,f,g)
    ldr w0,[x5,x6,lsl #2]     // load constantes k0
    add w9,w9,w0
    ldr w0,[x3,x6,lsl #2]     // Wt
    add w9,w9,w0
                              // calcul T2
    ldr w10,[x8,#var_a]       // fonction sigma 0
    ror w11,w10,#2
    ror w12,w10,#13
    eor w11,w11,w12
    ror w12,w10,#22
    eor w11,w11,w12
    ldr w2,[x8,#var_b]
    ldr w4,[x8,#var_c]
                              // fonction maj x and y xor x and z xor y and z
    and w12,w10,w2
    and w0,w10,w4
    eor w12,w12,w0
    and w0,w2,w4
    eor w12,w12,w0            //
    add w12,w12,w11           // T2
                              // compute variables
    ldr w4,[x8,#var_g]
    str w4,[x8,#var_h]
    ldr w4,[x8,#var_f]
    str w4,[x8,#var_g]
    ldr w4,[x8,#var_e]
    str w4,[x8,#var_f]
    ldr w4,[x8,#var_d]
    add w4,w4,w9              // add T1
    str w4,[x8,#var_e]
    ldr w4,[x8,#var_c]
    str w4,[x8,#var_d]
    ldr w4,[x8,#var_b]
    str w4,[x8,#var_c]
    ldr w4,[x8,#var_a]
    str w4,[x8,#var_b]
    add w4,w9,w12             // add T1 T2
    str w4,[x8,#var_a]

    add x6,x6,#1              // increment t
    cmp x6,#64
    blt loop64T
                              // End block
    ldr x0,qAdrtbH            // start area H
    mov x10,#0
loopStoreH:
    ldr w9,[x8,x10,lsl #2]
    ldr w3,[x0,x10,lsl #2]
    add w3,w3,w9
    str w3,[x0,x10,lsl #2]    // store variables in H0
    add x10,x10,#1
    cmp x10,#8
    blt loopStoreH
                              // other bloc
    add x7,x7,#1                 // increment block
    ldr x0,qAdrqNbBlocs
    ldr x4,[x0]               // restaur maxi block
    cmp x7,x4                 // maxi ?

    blt loopBlock             //  loop other block

    ldr x0,qAdrtbH            // return result address

100:
    ldp x1,lr,[sp],16         // restaur  2 registers
    ret                       // return to address lr x30
qAdrtbConstHi:            .quad tbConstHi
qAdrtbConstKt:            .quad tbConstKt
qAdrtbH:                  .quad tbH
qAdrtbW:                  .quad tbW
qAdrtbabcdefgh:           .quad tbabcdefgh
qAdrqNbBlocs:             .quad qNbBlocs
/******************************************************************/
/*     inversion des mots de 32 bits d un bloc                    */ 
/******************************************************************/
/* x0 contains N° block   */
inversion:
    stp x1,lr,[sp,-16]!            // save  registers
    stp x2,x3,[sp,-16]!            // save  registers
    ldr x1,qAdrsZoneTrav
    add x1,x1,x0,lsl #6           // debut du bloc
    mov x2,#0
1:                                                  // start loop
    ldr w3,[x1,x2,lsl #2]
    rev w3,w3
    str w3,[x1,x2,lsl #2]
    add x2,x2,#1
    cmp x2,#16
    blt 1b
100:
    ldp x2,x3,[sp],16              // restaur  2 registers
    ldp x1,lr,[sp],16              // restaur  2 registers
    ret                            // return to address lr x30
/******************************************************************/
/*     display hash  SHA1                         */ 
/******************************************************************/
/* x0 contains the address of hash  */
displaySHA1:
    stp x1,lr,[sp,-16]!            // save  registers
    stp x2,x3,[sp,-16]!            // save  registers
    mov x3,x0
    mov x2,#0
1:
    ldr w0,[x3,x2,lsl #2]          // load 4 bytes
    //rev x0,x0                    // reverse bytes
    ldr x1,qAdrsZoneConv
    bl conversion16_4W             // conversion hexa
    ldr x0,qAdrsZoneConv
    bl affichageMess
    add x2,x2,#1
    cmp x2,#LGHASH / 4
    blt 1b                         // and loop
    ldr x0,qAdrszCarriageReturn
    bl affichageMess               // display message
100:
    ldp x2,x3,[sp],16              // restaur  2 registers
    ldp x1,lr,[sp],16              // restaur  2 registers
    ret                            // return to address lr x30
/******************************************************************/
/*     conversion  hexadecimal register 32 bits                   */ 
/******************************************************************/
/* x0 contains value and x1 address zone receptrice   */
conversion16_4W:
    stp x0,lr,[sp,-48]!        // save  registres
    stp x1,x2,[sp,32]          // save  registres
    stp x3,x4,[sp,16]          // save  registres
    mov x2,#28                 // start bit position
    mov x4,#0xF0000000         // mask
    mov x3,x0                  // save entry value
1:                             // start loop
    and x0,x3,x4               // value register and mask
    lsr x0,x0,x2               // right shift
    cmp x0,#10                 // >= 10 ?
    bge 2f                     // yes
    add x0,x0,#48              // no is digit
    b 3f
2:
    add x0,x0,#55              // else is a letter A-F
3:
    strb w0,[x1],#1            // load result  and + 1 in address
    lsr x4,x4,#4               // shift mask 4 bits left
    subs x2,x2,#4              // decrement counter 4 bits <= zero  ?
    bge 1b                     // no -> loop

100:                           // fin standard de la fonction
    ldp x3,x4,[sp,16]          // restaur des  2 registres
    ldp x1,x2,[sp,32]          // restaur des  2 registres
    ldp x0,lr,[sp],48          // restaur des  2 registres
    ret    
/********************************************************/
/*        File Include fonctions                        */
/********************************************************/
/* for this file see task include a file in language AArch64 assembly */
.include "../includeARM64.inc"

  

You may also check:How to resolve the algorithm Naming conventions step by step in the REXX programming language
You may also check:How to resolve the algorithm The Twelve Days of Christmas step by step in the Fortran programming language
You may also check:How to resolve the algorithm Egyptian division step by step in the Nim programming language
You may also check:How to resolve the algorithm Read a file character by character/UTF8 step by step in the Phix programming language
You may also check:How to resolve the algorithm Sequence of primes by trial division step by step in the Ruby programming language