Original C source code:
#define BITSET_WORDIDX(idx) (idx >> 6)
#define BITSET_WORD_BITIDX(idx) (idx & 0x3F)
static inline __attribute__((always_inline, regparm(2))) unsigned int test_and_set_bit_nonatomic(
uint64_t *restrict bitset, const unsigned int bitset_idx)
{
bitset += BITSET_WORDIDX(bitset_idx);
register uint64_t u64_bitset = *bitset;
register const uint64_t u64_idx = BITSET_WORD_BITIDX(bitset_idx);
register unsigned int cf_copy;
asm volatile("btsq %2, %1"
: "=@ccc"(cf_copy), "+r"(u64_bitset)
: "r"(u64_idx));
*bitset = u64_bitset;
return cf_copy;
}
Compiled assembly for function call with gcc -Ofast -march=native -S -fverbose-asm (although I changed the comments for better readability):
# "%eax" in below line indicates the "bitset_idx" parameter is a register variable from above scope.
# (since its "always_inline" function)
# Writing to "%eax" will never happen in this scope.
movl %eax, %edx # (temp. register #0)bitset_idx = bitset_idx;
movl %eax, %ebp # (temp. register #1)bitset_idx = bitset_idx;
shrl $6, %edx # (temp. register #0)bitset_idx >>= 6;
andl $63, %ebp # (temp. register #1)bitset_idx &= 0x3F;
# "%r13" in below line indicates the "bitset" parameter is a register variable from above scope.
# (since its "always_inline" function)
# Writing to "%r13" will never happen in this scope.
leaq 0(%r13,%rdx,8), %rsi # (void *)bitset += ((temp. register #0)bitset_idx * 8)
movl %ebp, %edi # u64_idx = (temp. register #1)bitset_idx
movq (%rsi), %rdx # u64_bitset = *bitset
#APP
btsq %rdi, %rdx
#NO_APP
movq %rdx, (%rsi) # *bitset = u64_bitset
My question is, "Why GCC inserts movl %ebp, %edi # u64_idx = (temp. register #1)bitset_idx?". Since EBP register is already a temporal register variable, wouldn't it be more efficient to treat EBP register as "u64_idx" from here?