Stefan Kanthak
2018-11-05 16:00:45 UTC
Hi @ll,
the following code snippets let GCC x86-64 generate rather
poor code (see <https://godbolt.org/z/hDj0W5>):
__int128 foo(__int128 n)
{
n <<= 1;
n += 1;
n |= 1;
return n;
}
__int128 bar(__int128 n)
{
n += n;
n += 1;
n |= 1;
return n;
}
With -O1: With -O2:
foo: foo:
mov rax, rdi mov rax, rdi
mov rdx, rsi mov rdx, rsi
shld rdx, rdi, 1 add rax, rdi
add rax, rdi shld rdx, rdi, 1
add rax, 1 add rax, 1
adc rdx, 0 adc rdx, 0
mov rcx, rdx or rax, 1
or rax, 1 mov rcx, rdx
mov rdx, rcx mov rdx, rcx
ret ret
bar: bar:
mov rax, rdi mov rax, rdi
mov rdx, rsi mov rdx, rsi
shld rdx, rdi, 1 add rax, rdi
add rax, rdi shld rdx, rdi, 1
add rax, 1 add rax, 1
adc rdx, 0 adc rdx, 0
mov rcx, rdx or rax, 1
or rax, 1 mov rcx, rdx
mov rdx, rcx mov rdx, rcx
ret ret
In all 4 examples, the last 4 instructions are superfluous!
1. the optimizer should be aware that both "n <<= 1;" and "n += n;"
yield an even number, and thus a following "n += 1;" can't
generate a carry.
More general: after "n <<= x;" addition of any value less than
1 << x can't generate a carry.
2. the optimzier should also be aware that addition of 1 to an even
number yields an odd number, and thus a following "n |= 1;" is
a no-op.
More general: after "n <<= x;", the addition of any value less
than 1 << x is equal to a logical or with the same value.
3. last, it should never create "mov rcx, rdx" followed by an
inverse "mov rdx, rcx".
I also wonder why a shld is created here: at least for "n += n;"
I expect a more straightforward
add rax, rax
adc rdx, rdx
regards
Stefan Kanthak
PS: of course GCC x86-32 exhibits the same flaws with int64_t!
the following code snippets let GCC x86-64 generate rather
poor code (see <https://godbolt.org/z/hDj0W5>):
__int128 foo(__int128 n)
{
n <<= 1;
n += 1;
n |= 1;
return n;
}
__int128 bar(__int128 n)
{
n += n;
n += 1;
n |= 1;
return n;
}
With -O1: With -O2:
foo: foo:
mov rax, rdi mov rax, rdi
mov rdx, rsi mov rdx, rsi
shld rdx, rdi, 1 add rax, rdi
add rax, rdi shld rdx, rdi, 1
add rax, 1 add rax, 1
adc rdx, 0 adc rdx, 0
mov rcx, rdx or rax, 1
or rax, 1 mov rcx, rdx
mov rdx, rcx mov rdx, rcx
ret ret
bar: bar:
mov rax, rdi mov rax, rdi
mov rdx, rsi mov rdx, rsi
shld rdx, rdi, 1 add rax, rdi
add rax, rdi shld rdx, rdi, 1
add rax, 1 add rax, 1
adc rdx, 0 adc rdx, 0
mov rcx, rdx or rax, 1
or rax, 1 mov rcx, rdx
mov rdx, rcx mov rdx, rcx
ret ret
In all 4 examples, the last 4 instructions are superfluous!
1. the optimizer should be aware that both "n <<= 1;" and "n += n;"
yield an even number, and thus a following "n += 1;" can't
generate a carry.
More general: after "n <<= x;" addition of any value less than
1 << x can't generate a carry.
2. the optimzier should also be aware that addition of 1 to an even
number yields an odd number, and thus a following "n |= 1;" is
a no-op.
More general: after "n <<= x;", the addition of any value less
than 1 << x is equal to a logical or with the same value.
3. last, it should never create "mov rcx, rdx" followed by an
inverse "mov rdx, rcx".
I also wonder why a shld is created here: at least for "n += n;"
I expect a more straightforward
add rax, rax
adc rdx, rdx
regards
Stefan Kanthak
PS: of course GCC x86-32 exhibits the same flaws with int64_t!