5

in x86 inline assembly i can write something like this:

asm ("cpuid"
            : "=a" (_eax),
              "=b" (_ebx),
              "=c" (_ecx),
              "=d" (_edx)
            : "a" (op));

so in the matchin constraints instead of just writing "=r" and let the compiler chose the register, I can say which particular register i want to use (=a for example to use %eax)

how can I do this for ARM assembly? the ARM GCC assembly cookbook http://www.ethernut.de/en/documents/arm-inline-asm.html states that i can for example use the constraints "r" for one of the general purpose registers R0-R15 "w" for one of the VFP floating point registers S0-S31

but how can I constraint an operand for example exactly to s1? or to a particular general purpose register?

Ciro Santilli OurBigBook.com
  • 347,512
  • 102
  • 1,199
  • 985
Mat
  • 505
  • 1
  • 5
  • 7
  • What problem would it solve on ARM to know exactly which register a specific operand is placed in ? What instructions on ARM _implicitly_ modify registers (other than pre/post increment/decrement load/store operations, for which specific constraints exist) ? In this aspect, ARM and x86 are quite different ... what's necessary on x86 is not on ARM. – FrankH. May 21 '13 at 19:55
  • 3
    @FrankH. System calls? That's actually how I found this question. – Duc Jun 24 '16 at 21:51

2 Answers2

6

I don't think gcc for ARM allows you to use constraints to specify exactly which register to use. However, you can use explicit register variables to specify a register to store a variable in:

register int my_variable asm("r0");
Mike Seymour
  • 249,747
  • 28
  • 448
  • 644
2

Explicit register variables minimal runnable example

Here is an ARMv8 Linux C freestanding hello world exemplifying https://stackoverflow.com/a/3936064/9160762 with some disassembly analysis:

main.c

#include <inttypes.h>

void _start(void) {
    uint64_t exit_status;

    /* write */
    {
        char msg[] = "hello syscall v8\n";
        uint64_t syscall_return;
        register uint64_t x0 __asm__ ("x0") = 1; /* stdout */
        register char *x1 __asm__ ("x1") = msg;
        register uint64_t x2 __asm__ ("x2") = sizeof(msg);
        register uint64_t x8 __asm__ ("x8") = 64; /* syscall number */
        __asm__ __volatile__ (
            "svc 0;"
            : "+r" (x0)
            : "r" (x1), "r" (x2), "r" (x8)
            : "memory"
        );
        syscall_return = x0;
        exit_status = (syscall_return != sizeof(msg));
    }

    /* exit */
    {
        register uint64_t x0 __asm__ ("x0") = exit_status;
        register uint64_t x8 __asm__ ("x8") = 93;
        __asm__ __volatile__ (
            "svc 0;"
            : "+r" (x0)
            : "r" (x8)
            :
        );
    }
}

GitHub upstream.

Compile and run:

sudo apt-get install qemu-user gcc-aarch64-linux-gnu
aarch64-linux-gnu-gcc -O3 -std=c99 -ggdb3 -march=armv8-a -pedantic -Wall -Wextra \
  -ffreestanding -nostdlib -static -o main.out main.c
qemu-aarch64 main.out

Output:

hello syscall v8

Disassembly:

aarch64-linux-gnu-objdump -S main.out

Output:

main.out:     file format elf64-littleaarch64


Disassembly of section .text:

0000000000400110 <_start>:
void _start(void) {
    uint64_t exit_status;

    /* write */
    {
        char msg[] = "hello syscall v8\n";
  400110:   90000003    adrp    x3, 400000 <_start-0x110>
  400114:   91056063    add x3, x3, #0x158
void _start(void) {
  400118:   d10083ff    sub sp, sp, #0x20
        uint64_t syscall_return;
        register uint64_t x0 __asm__ ("x0") = 1; /* stdout */
  40011c:   d2800020    mov x0, #0x1                    // #1
        register char *x1 __asm__ ("x1") = msg;
  400120:   910023e1    add x1, sp, #0x8
        register uint64_t x2 __asm__ ("x2") = sizeof(msg);
  400124:   d2800242    mov x2, #0x12                   // #18
        char msg[] = "hello syscall v8\n";
  400128:   a9401464    ldp x4, x5, [x3]
        register uint64_t x8 __asm__ ("x8") = 64; /* syscall number */
  40012c:   d2800808    mov x8, #0x40                   // #64
        char msg[] = "hello syscall v8\n";
  400130:   79402063    ldrh    w3, [x3, #16]
  400134:   a90097e4    stp x4, x5, [sp, #8]
  400138:   790033e3    strh    w3, [sp, #24]
        __asm__ __volatile__ (
  40013c:   d4000001    svc #0x0
            : "+r" (x0)
            : "r" (x1), "r" (x2), "r" (x8)
            : "memory"
        );
        syscall_return = x0;
        exit_status = (syscall_return != sizeof(msg));
  400140:   eb02001f    cmp x0, x2
    }

    /* exit */
    {
        register uint64_t x0 __asm__ ("x0") = exit_status;
        register uint64_t x8 __asm__ ("x8") = 93;
  400144:   d2800ba8    mov x8, #0x5d                   // #93
        register uint64_t x0 __asm__ ("x0") = exit_status;
  400148:   9a9f07e0    cset    x0, ne  // ne = any
        __asm__ __volatile__ (
  40014c:   d4000001    svc #0x0
            : "+r" (x0)
            : "r" (x8)
            :
        );
    }
}
  400150:   910083ff    add sp, sp, #0x20
  400154:   d65f03c0    ret

Attempt without explicit register variables

Mostly for fun, I tried to reach the same result without using register variables, but I was not able to do it.

In any case, the code would be more complicated, so you are better off just using register variables.

Here is my best attempt:

main.c

#include <inttypes.h>

void _start(void) {
    uint64_t exit_status;

    /* write */
    {
        char msg[] = "hello syscall v8\n";
        uint64_t syscall_return;
        __asm__ (
            "mov x0, 1;" /* stdout */
            "mov x1, %[msg];"
            "mov x2, %[len];"
            "mov x8, 64;" /* syscall number */
            "svc 0;"
            "mov %[syscall_return], x0;"
            : [syscall_return] "=r" (syscall_return)
            : [msg] "p" (msg),
            [len] "i" (sizeof(msg))
            : "x0", "x1", "x2", "x8", "memory"
        );
        exit_status = (syscall_return != sizeof(msg));
    }

    /* exit */
    __asm__ (
        "mov x0, %[exit_status];"
        "mov x8, 93;" /* syscall number */
        "svc 0;"
        :
        : [exit_status] "r" (exit_status)
        : "x0", "x8"
    );
}

GitHub upstream.

Disassembly:

main.out:     file format elf64-littleaarch64


Disassembly of section .text:

0000000000400110 <_start>:
void _start(void) {
    uint64_t exit_status;

    /* write */
    {
        char msg[] = "hello syscall v8\n";
  400110:   90000000        adrp    x0, 400000 <_start-0x110>
  400114:   9105a000        add     x0, x0, #0x168
void _start(void) {
  400118:   d10083ff        sub     sp, sp, #0x20
        char msg[] = "hello syscall v8\n";
  40011c:   a9400c02        ldp     x2, x3, [x0]
  400120:   a9008fe2        stp     x2, x3, [sp, #8]
  400124:   79402000        ldrh    w0, [x0, #16]
        uint64_t syscall_return;
        __asm__ (
  400128:   910023e3        add     x3, sp, #0x8
        char msg[] = "hello syscall v8\n";
  40012c:   790033e0        strh    w0, [sp, #24]
        __asm__ (
  400130:   d2800020        mov     x0, #0x1                        // #1
  400134:   aa0303e1        mov     x1, x3
  400138:   d2800242        mov     x2, #0x12                       // #18
  40013c:   d2800808        mov     x8, #0x40                       // #64
  400140:   d4000001        svc     #0x0
  400144:   aa0003e3        mov     x3, x0
            : [syscall_return] "=r" (syscall_return)
            : [msg] "p" (msg),
            [len] "i" (sizeof(msg))
            : "x0", "x1", "x2", "x8", "memory"
        );
        exit_status = (syscall_return != sizeof(msg));
  400148:   f100487f        cmp     x3, #0x12
  40014c:   9a9f07e1        cset    x1, ne  // ne = any
    }

    /* exit */
    __asm__ (
  400150:   aa0103e0        mov     x0, x1
  400154:   d2800ba8        mov     x8, #0x5d                       // #93
  400158:   d4000001        svc     #0x0
        "svc 0;"
        :
        : [exit_status] "r" (exit_status)
        : "x0", "x8"
    );
}
  40015c:   910083ff        add     sp, sp, #0x20
  400160:   d65f03c0        ret

This was less efficient for the following reasons:

  • write constraint p need to use an intermediate register x3 for the add to sp

  • I don't know how to get the syscall return status without an extra mov to an output register

  • exit status gets moved one extra time through x1. With register variables is just calculated directly into x0.

Tested in Ubuntu 18.10, GCC 8.2.0, QEMU 2.12.

Ciro Santilli
  • 3,693
  • 1
  • 18
  • 44
  • This is unsafe: nothing tells the compiler that the memory pointed to by ` "r" (msg)` is also an input to the asm. With optimization, gcc will optimize away the apparently-dead store to the stack array. You need a `"memory"` clobber, or a dummy input like `"m"( *(char (*)[]) msg )` (a dereferenced pointer to an array: i.e. the whole array is the memory operand). Or since we know the length, `"m"( *(char (*)[len]) msg )`. – Peter Cordes Feb 23 '19 at 22:52
  • You don't need to waste instructions on `mov` inside the asm statement. The accepted answer should work: `register const char * foo asm("x1") = "blah blah"; will make `"r"(foo)` pick `x1`. IDK why you're giving this big example of the crappy sub-optimal way. – Peter Cordes Feb 23 '19 at 23:12
  • @PeterCordes ah, I hadn't thought about that that it will be less efficient. I was trying to avoid the register variables as they are ugly, but I don't see a solution in that case. I'll update this. – Ciro Santilli Feb 23 '19 at 23:35
  • This is *exactly* what `register ... asm` locals are for. In fact this is the only *supported* use-case. Some other stuff (like using them to read a register without an asm statement) also usually work, but the somewhat-recent change to the docs makes that unsupported. You could wrap the variable declarations + asm in a `{ }` scope in a larger function, or more usually just the body of an inline wrapper function. – Peter Cordes Feb 23 '19 at 23:38
  • @PeterCordes let's try again :-) – Ciro Santilli Feb 24 '19 at 12:54