mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 03:11:11 -04:00
LoongArch: Align FPU register state to 32 bytes
Move fpr to the beginning of struct loongarch_fpu so it is naturally aligned to FPU_ALIGN (32 bytes), improving 256-bit SIMD (LASX) context switch performance. Also adjust process.c and fpu.S to work well with the new loongarch_fpu layout. Signed-off-by: Lisa Robinson <lisa@bytefly.space> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
This commit is contained in:
committed by
Huacai Chen
parent
1829419bc3
commit
e3f4591f79
@@ -80,10 +80,10 @@ BUILD_FPR_ACCESS(32)
|
||||
BUILD_FPR_ACCESS(64)
|
||||
|
||||
struct loongarch_fpu {
|
||||
union fpureg fpr[NUM_FPU_REGS];
|
||||
uint64_t fcc; /* 8x8 */
|
||||
uint32_t fcsr;
|
||||
uint32_t ftop;
|
||||
union fpureg fpr[NUM_FPU_REGS];
|
||||
};
|
||||
|
||||
struct loongarch_lbt {
|
||||
|
||||
@@ -97,7 +97,7 @@
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_32BIT
|
||||
.macro sc_save_fcc thread tmp0 tmp1
|
||||
.macro sc_save_fcc base tmp0 tmp1
|
||||
movcf2gr \tmp0, $fcc0
|
||||
move \tmp1, \tmp0
|
||||
movcf2gr \tmp0, $fcc1
|
||||
@@ -106,7 +106,7 @@
|
||||
bstrins.w \tmp1, \tmp0, 23, 16
|
||||
movcf2gr \tmp0, $fcc3
|
||||
bstrins.w \tmp1, \tmp0, 31, 24
|
||||
EX st.w \tmp1, \thread, THREAD_FCC
|
||||
EX st.w \tmp1, \base, 0
|
||||
movcf2gr \tmp0, $fcc4
|
||||
move \tmp1, \tmp0
|
||||
movcf2gr \tmp0, $fcc5
|
||||
@@ -115,11 +115,11 @@
|
||||
bstrins.w \tmp1, \tmp0, 23, 16
|
||||
movcf2gr \tmp0, $fcc7
|
||||
bstrins.w \tmp1, \tmp0, 31, 24
|
||||
EX st.w \tmp1, \thread, (THREAD_FCC + 4)
|
||||
EX st.w \tmp1, \base, 4
|
||||
.endm
|
||||
|
||||
.macro sc_restore_fcc thread tmp0 tmp1
|
||||
EX ld.w \tmp0, \thread, THREAD_FCC
|
||||
.macro sc_restore_fcc base tmp0 tmp1
|
||||
EX ld.w \tmp0, \base, 0
|
||||
bstrpick.w \tmp1, \tmp0, 7, 0
|
||||
movgr2cf $fcc0, \tmp1
|
||||
bstrpick.w \tmp1, \tmp0, 15, 8
|
||||
@@ -128,7 +128,7 @@
|
||||
movgr2cf $fcc2, \tmp1
|
||||
bstrpick.w \tmp1, \tmp0, 31, 24
|
||||
movgr2cf $fcc3, \tmp1
|
||||
EX ld.w \tmp0, \thread, (THREAD_FCC + 4)
|
||||
EX ld.w \tmp0, \base, 4
|
||||
bstrpick.w \tmp1, \tmp0, 7, 0
|
||||
movgr2cf $fcc4, \tmp1
|
||||
bstrpick.w \tmp1, \tmp0, 15, 8
|
||||
|
||||
@@ -135,6 +135,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
||||
return 0;
|
||||
}
|
||||
|
||||
dst->thread.fpu.fcsr = src->thread.fpu.fcsr;
|
||||
|
||||
if (!used_math())
|
||||
memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user