x86/crc32: improve crc32c_arch() code generation with clang

crc32c_arch() is affected by
https://github.com/llvm/llvm-project/issues/20571 where clang
unnecessarily spills the inputs to "rm"-constrained operands to the
stack.  Replace "rm" with ASM_INPUT_RM which partially works around this
by expanding to "r" when the compiler is clang.  This results in better
code generation with clang, though still not optimal.

Link: https://lore.kernel.org/r/20250210210741.471725-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
This commit is contained in:
Eric Biggers
2025-02-10 13:07:41 -08:00
parent 4ffd50862d
commit cf1ea3a7c1

View File

@@ -55,10 +55,10 @@ u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
for (num_longs = len / sizeof(unsigned long);
num_longs != 0; num_longs--, p += sizeof(unsigned long))
asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
for (len %= sizeof(unsigned long); len; len--, p++)
asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
return crc;
}