mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-09 10:11:52 -04:00
x86/percpu: Use C for percpu read/write accessors
The percpu code mostly uses inline assembly. Using segment qualifiers
allows to use C code instead, which enables the compiler to perform
various optimizations (e.g. propagation of memory arguments). Convert
percpu read and write accessors to C code, so the memory argument can
be propagated to the instruction that uses this argument.
Some examples of propagations:
a) into sign/zero extensions:
the code improves from:
65 8a 05 00 00 00 00 mov %gs:0x0(%rip),%al
0f b6 c0 movzbl %al,%eax
to:
65 0f b6 05 00 00 00 movzbl %gs:0x0(%rip),%eax
00
and in a similar way for:
movzbl %gs:0x0(%rip),%edx
movzwl %gs:0x0(%rip),%esi
movzbl %gs:0x78(%rbx),%eax
movslq %gs:0x0(%rip),%rdx
movslq %gs:(%rdi),%rbx
b) into compares:
the code improves from:
65 8b 05 00 00 00 00 mov %gs:0x0(%rip),%eax
a9 00 00 0f 00 test $0xf0000,%eax
to:
65 f7 05 00 00 00 00 testl $0xf0000,%gs:0x0(%rip)
00 00 0f 00
and in a similar way for:
testl $0xf0000,%gs:0x0(%rip)
testb $0x1,%gs:0x0(%rip)
testl $0xff00,%gs:0x0(%rip)
cmpb $0x0,%gs:0x0(%rip)
cmp %gs:0x0(%rip),%r14d
cmpw $0x8,%gs:0x0(%rip)
cmpb $0x0,%gs:(%rax)
c) into other insns:
the code improves from:
1a355: 83 fa ff cmp $0xffffffff,%edx
1a358: 75 07 jne 1a361 <...>
1a35a: 65 8b 15 00 00 00 00 mov %gs:0x0(%rip),%edx
1a361:
to:
1a35a: 83 fa ff cmp $0xffffffff,%edx
1a35d: 65 0f 44 15 00 00 00 cmove %gs:0x0(%rip),%edx
1a364: 00
The above propagations result in the following code size
improvements for current mainline kernel (with the default config),
compiled with:
# gcc (GCC) 12.3.1 20230508 (Red Hat 12.3.1-1)
text data bss dec filename
25508862 4386540 808388 30703790 vmlinux-vanilla.o
25500922 4386532 808388 30695842 vmlinux-new.o
Co-developed-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/20231004192404.31733-1-ubizjak@gmail.com
This commit is contained in:
@@ -400,13 +400,66 @@ do { \
|
||||
#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp)
|
||||
#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
|
||||
|
||||
#ifdef CONFIG_USE_X86_SEG_SUPPORT
|
||||
|
||||
#define __raw_cpu_read(qual, pcp) \
|
||||
({ \
|
||||
*(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)); \
|
||||
})
|
||||
|
||||
#define __raw_cpu_write(qual, pcp, val) \
|
||||
do { \
|
||||
*(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val); \
|
||||
} while (0)
|
||||
|
||||
#define raw_cpu_read_1(pcp) __raw_cpu_read(, pcp)
|
||||
#define raw_cpu_read_2(pcp) __raw_cpu_read(, pcp)
|
||||
#define raw_cpu_read_4(pcp) __raw_cpu_read(, pcp)
|
||||
#define raw_cpu_write_1(pcp, val) __raw_cpu_write(, pcp, val)
|
||||
#define raw_cpu_write_2(pcp, val) __raw_cpu_write(, pcp, val)
|
||||
#define raw_cpu_write_4(pcp, val) __raw_cpu_write(, pcp, val)
|
||||
|
||||
#define this_cpu_read_1(pcp) __raw_cpu_read(volatile, pcp)
|
||||
#define this_cpu_read_2(pcp) __raw_cpu_read(volatile, pcp)
|
||||
#define this_cpu_read_4(pcp) __raw_cpu_read(volatile, pcp)
|
||||
#define this_cpu_write_1(pcp, val) __raw_cpu_write(volatile, pcp, val)
|
||||
#define this_cpu_write_2(pcp, val) __raw_cpu_write(volatile, pcp, val)
|
||||
#define this_cpu_write_4(pcp, val) __raw_cpu_write(volatile, pcp, val)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define raw_cpu_read_8(pcp) __raw_cpu_read(, pcp)
|
||||
#define raw_cpu_write_8(pcp, val) __raw_cpu_write(, pcp, val)
|
||||
|
||||
#define this_cpu_read_8(pcp) __raw_cpu_read(volatile, pcp)
|
||||
#define this_cpu_write_8(pcp, val) __raw_cpu_write(volatile, pcp, val)
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_USE_X86_SEG_SUPPORT */
|
||||
|
||||
#define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp)
|
||||
#define raw_cpu_read_2(pcp) percpu_from_op(2, , "mov", pcp)
|
||||
#define raw_cpu_read_4(pcp) percpu_from_op(4, , "mov", pcp)
|
||||
|
||||
#define raw_cpu_write_1(pcp, val) percpu_to_op(1, , "mov", (pcp), val)
|
||||
#define raw_cpu_write_2(pcp, val) percpu_to_op(2, , "mov", (pcp), val)
|
||||
#define raw_cpu_write_4(pcp, val) percpu_to_op(4, , "mov", (pcp), val)
|
||||
|
||||
#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp)
|
||||
#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp)
|
||||
#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp)
|
||||
#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val)
|
||||
#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val)
|
||||
#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp)
|
||||
#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val)
|
||||
|
||||
#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
|
||||
#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_USE_X86_SEG_SUPPORT */
|
||||
|
||||
#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val)
|
||||
#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val)
|
||||
#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val)
|
||||
@@ -432,12 +485,6 @@ do { \
|
||||
#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
|
||||
#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
|
||||
|
||||
#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp)
|
||||
#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp)
|
||||
#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp)
|
||||
#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val)
|
||||
#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val)
|
||||
#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val)
|
||||
#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val)
|
||||
#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val)
|
||||
#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val)
|
||||
@@ -476,8 +523,6 @@ do { \
|
||||
* 32 bit must fall back to generic operations.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp)
|
||||
#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val)
|
||||
#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
|
||||
#define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val)
|
||||
#define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val)
|
||||
@@ -486,8 +531,6 @@ do { \
|
||||
#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval)
|
||||
#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval)
|
||||
|
||||
#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
|
||||
#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
|
||||
#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
|
||||
#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
|
||||
#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
|
||||
|
||||
Reference in New Issue
Block a user