1
%include "asm_io.inc"
;
; initialized data is put in the .data segment
;
segment .data
array: dd 180,32,455,499,388,480,239,346,257,84
fmt: dd ",%d",0

; uninitialized data is put in the .bss segment
;
segment .bss
 resd 10
;
; code is put in the .text segment
;
segment .text
        extern  printf
        global  asm_main
asm_main:
        enter   0,0               ; setup routine
        pusha

; The following is just example of how to print an array

        push dword 10
        push dword array
        call print_array
        add  esp,8                ; clean up stack

; don't delete anything following this comment
        popa
        mov     eax, 0            ; return back to C
        leave                     
        ret



segment .data
ListFormat    db   ",%u", 0

segment .text
        global  print_array
print_array:
        enter   0,0
        push    esi
        push    ebx

        xor     esi, esi                  ; esi = 0
        mov     ecx, [ebp+12]             ; ecx = n
        mov     ebx, [ebp+8]
        xor     edx, edx
        mov     dl, [ebx + esi]     ; ebx = address of array
        mov     eax,edx             
        call    print_int
        dec     ecx
        inc     esi

print_loop:
        xor     edx,edx
        mov     dl,[ebx + esi]
        push    ecx                       ; printf might change ecx!
        push    edx                       ; push array value
        push    dword ListFormat
        call    printf
        add     esp, 8              ; remove parameters (leave ecx!)
        inc     esi
        pop     ecx
        loop    print_loop
        call    print_nl

        pop     ebx
        pop     esi
        leave
        ret

So this code prints out 180,0,0,0,32,0,0,0,199,1 when I want to print out 180,32,455,499,388,480,239,346,257,84. I think that it's because this is designed to print byte words. I'm trying to print in double words and I'm guessing something in the print_array needs to be changed. I tried mov dl, [ebx+esi*4] but it still doesn't print the array that I want to print. Or does something else needs to be changed to print array of double words?

  • 1
    `dl` is a byte-sized register. You want `mov edx, [ebx + esi*4]` (assuming that `print_int` will use all the bits in `edx` and not just `dl`). – Michael Dec 17 '20 at 06:48

1 Answers1

2

You could leave it at changing the mov dl, [ebx+esi] instruction into mov edx, [ebx+esi*4], but that would be just half the fun!

1 Why not try to make a loop that can deal with the special case of the first value in the list that doesn't need the comma prefix? No more using print_int.
2 Also don't use the LOOP instruction. It's slow! The pair cmp jb (that can macro-fuse) is much better.
3 And replacing the prolog enter 0,0 and epilog leave codes by simply addressing the parameters via ESP relative addressing is simple enough.
4 Always consider the special cases! What if the array happens to be empty?

print_array:
        push    ebx
        push    esi
        mov     ebx, [esp+12]       ; Begin array
        mov     esi, [esp+16]       ; n
        test    esi, esi
        jz      done

        lea     esi, [ebx+esi*4]    ; End array
        mov     edx, ListFormat+1   ; "%u"
more:   mov     eax, [ebx]          ; Array dword value
        push    eax
        push    edx                 ; "%u" first time, ",%u" others
        call    printf
        add     esp, 8
        add     ebx, 4              ; To next dword in the array
        mov     edx, ListFormat     ; ",%u"
        cmp     ebx, esi            ; Current address < Last address ?
        jb      more                ; Yes
        call    print_nl

done:   pop     esi
        pop     ebx
        ret

Under the right conditions, keeping ESP fixed inside this loop can be worth doing. See Peter Cordes' comments below this answer.
Next is a version of this code that keeps ESP fixed inside the loop:

print_array:
        push    ebx
        push    esi
        mov     ebx, [esp+12]       ; Begin array
        mov     esi, [esp+16]       ; n
        test    esi, esi
        jz      done

        sub     esp, 8              ; Space for the printf args
        lea     esi, [ebx+esi*4]    ; End array
        mov     edx, ListFormat+1   ; "%u"
more:   mov     eax, [ebx]          ; Array dword value
        mov     [esp+4], eax
        mov     [esp], edx          ; "%u" first time, ",%u" others
        call    printf
        add     ebx, 4              ; To next dword in the array
        mov     edx, ListFormat     ; ",%u"
        cmp     ebx, esi            ; Current address < Last address ?
        jb      more                ; Yes
        call    print_nl
        add     esp, 8

done:   pop     esi
        pop     ebx
        ret
Sep Roland
  • 33,889
  • 7
  • 43
  • 76
  • 1
    If you're going to optimize it for fun, you might keep ESP fixed inside the loop, so you store args with `mov [esp+4], eax` and `mov [esp], edx`. The `add esp,8` goes after the loop, but you need a `sub esp,8` before it. (Or `sub esp, 8+12` to keep the stack aligned by 16 if this is Linux; other systems didn't break ABI compat by requiring 16-byte ESP alignment in 32-bit code.) But if you're going for efficient but compact overall, this is good. (And appropriate for code that calls an expensive func like printf). Neat idea to handle the varying format with pointer register instead of peeling. – Peter Cordes Dec 18 '20 at 06:47
  • @PeterCordes Thanks for correcting my typo! I get the mechanics about keeping `ESP` fixed inside the loop, but I'm not sure *how* this is an improvement. In your answer to [this question](https://stackoverflow.com/questions/54346690/why-is-it-better-to-use-the-ebp-than-the-esp-register-to-locate-parameters-on-th) you only name the availability of an extra register as an advantage despite the downsides to using `[ ESP+..]`. In the above code there's no register shortage. So what exactly is the improvement here? – Sep Roland Dec 18 '20 at 16:03
  • The improvement is one fewer instruction *inside* the loop, and everything else is equal. (The instructions are all single-uop on modern CPUs, unlike on early P6 where push was multiple. Still a stack-sync uop because we access ESP directly either way; that would be avoided, too, if we could store to the stack via EBP.) This trades larger code-size and static instruction count for lower *dynamic* instruction and uop count. If `printf` was a cheap function (despite using a crappy stack-args convention) and this loop was expected to run *many* iterations, it could be worth doing. – Peter Cordes Dec 18 '20 at 16:38