diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h index d1e8591a5919..66bc168bc6b5 100644 --- a/include/linux/irq-entry-common.h +++ b/include/linux/irq-entry-common.h @@ -304,6 +304,8 @@ static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs) */ static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs) { + lockdep_assert_irqs_disabled(); + instrumentation_begin(); irqentry_exit_to_user_mode_prepare(regs); instrumentation_end(); @@ -356,6 +358,138 @@ void dynamic_irqentry_exit_cond_resched(void); #define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() #endif /* CONFIG_PREEMPT_DYNAMIC */ +/** + * irqentry_enter_from_kernel_mode - Establish state before invoking the irq handler + * @regs: Pointer to currents pt_regs + * + * Invoked from architecture specific entry code with interrupts disabled. + * Can only be called when the interrupt entry came from kernel mode. The + * calling code must be non-instrumentable. When the function returns all + * state is correct and the subsequent functions can be instrumented. + * + * The function establishes state (lockdep, RCU (context tracking), tracing) and + * is provided for architectures which require a strict split between entry from + * kernel and user mode and therefore cannot use irqentry_enter() which handles + * both entry modes. + * + * Returns: An opaque object that must be passed to irqentry_exit_to_kernel_mode(). + */ +static __always_inline irqentry_state_t irqentry_enter_from_kernel_mode(struct pt_regs *regs) +{ + irqentry_state_t ret = { + .exit_rcu = false, + }; + + /* + * If this entry hit the idle task invoke ct_irq_enter() whether + * RCU is watching or not. + * + * Interrupts can nest when the first interrupt invokes softirq + * processing on return which enables interrupts. + * + * Scheduler ticks in the idle task can mark quiescent state and + * terminate a grace period, if and only if the timer interrupt is + * not nested into another interrupt. + * + * Checking for rcu_is_watching() here would prevent the nesting + * interrupt to invoke ct_irq_enter(). If that nested interrupt is + * the tick then rcu_flavor_sched_clock_irq() would wrongfully + * assume that it is the first interrupt and eventually claim + * quiescent state and end grace periods prematurely. + * + * Unconditionally invoke ct_irq_enter() so RCU state stays + * consistent. + * + * TINY_RCU does not support EQS, so let the compiler eliminate + * this part when enabled. + */ + if (!IS_ENABLED(CONFIG_TINY_RCU) && + (is_idle_task(current) || arch_in_rcu_eqs())) { + /* + * If RCU is not watching then the same careful + * sequence vs. lockdep and tracing is required + * as in irqentry_enter_from_user_mode(). + */ + lockdep_hardirqs_off(CALLER_ADDR0); + ct_irq_enter(); + instrumentation_begin(); + kmsan_unpoison_entry_regs(regs); + trace_hardirqs_off_finish(); + instrumentation_end(); + + ret.exit_rcu = true; + return ret; + } + + /* + * If RCU is watching then RCU only wants to check whether it needs + * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() + * already contains a warning when RCU is not watching, so no point + * in having another one here. + */ + lockdep_hardirqs_off(CALLER_ADDR0); + instrumentation_begin(); + kmsan_unpoison_entry_regs(regs); + rcu_irq_enter_check_tick(); + trace_hardirqs_off_finish(); + instrumentation_end(); + + return ret; +} + +/** + * irqentry_exit_to_kernel_mode - Run preempt checks and establish state after + * invoking the interrupt handler + * @regs: Pointer to current's pt_regs + * @state: Return value from matching call to irqentry_enter_from_kernel_mode() + * + * This is the counterpart of irqentry_enter_from_kernel_mode() and runs the + * necessary preemption check if possible and required. It returns to the caller + * with interrupts disabled and the correct state vs. tracing, lockdep and RCU + * required to return to the interrupted context. + * + * It is the last action before returning to the low level ASM code which just + * needs to return. + */ +static __always_inline void irqentry_exit_to_kernel_mode(struct pt_regs *regs, + irqentry_state_t state) +{ + lockdep_assert_irqs_disabled(); + + if (!regs_irqs_disabled(regs)) { + /* + * If RCU was not watching on entry this needs to be done + * carefully and needs the same ordering of lockdep/tracing + * and RCU as the return to user mode path. + */ + if (state.exit_rcu) { + instrumentation_begin(); + /* Tell the tracer that IRET will enable interrupts */ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(); + instrumentation_end(); + ct_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); + return; + } + + instrumentation_begin(); + if (IS_ENABLED(CONFIG_PREEMPTION)) + irqentry_exit_cond_resched(); + + /* Covers both tracing and lockdep */ + trace_hardirqs_on(); + instrumentation_end(); + } else { + /* + * IRQ flags state is correct already. Just tell RCU if it + * was not watching on entry. + */ + if (state.exit_rcu) + ct_irq_exit(); + } +} + /** * irqentry_enter - Handle state tracking on ordinary interrupt entries * @regs: Pointer to pt_regs of interrupted context diff --git a/kernel/entry/common.c b/kernel/entry/common.c index b5e05d87ba39..1034be02eae8 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -105,70 +105,16 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs, noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) { - irqentry_state_t ret = { - .exit_rcu = false, - }; - if (user_mode(regs)) { + irqentry_state_t ret = { + .exit_rcu = false, + }; + irqentry_enter_from_user_mode(regs); return ret; } - /* - * If this entry hit the idle task invoke ct_irq_enter() whether - * RCU is watching or not. - * - * Interrupts can nest when the first interrupt invokes softirq - * processing on return which enables interrupts. - * - * Scheduler ticks in the idle task can mark quiescent state and - * terminate a grace period, if and only if the timer interrupt is - * not nested into another interrupt. - * - * Checking for rcu_is_watching() here would prevent the nesting - * interrupt to invoke ct_irq_enter(). If that nested interrupt is - * the tick then rcu_flavor_sched_clock_irq() would wrongfully - * assume that it is the first interrupt and eventually claim - * quiescent state and end grace periods prematurely. - * - * Unconditionally invoke ct_irq_enter() so RCU state stays - * consistent. - * - * TINY_RCU does not support EQS, so let the compiler eliminate - * this part when enabled. - */ - if (!IS_ENABLED(CONFIG_TINY_RCU) && - (is_idle_task(current) || arch_in_rcu_eqs())) { - /* - * If RCU is not watching then the same careful - * sequence vs. lockdep and tracing is required - * as in irqentry_enter_from_user_mode(). - */ - lockdep_hardirqs_off(CALLER_ADDR0); - ct_irq_enter(); - instrumentation_begin(); - kmsan_unpoison_entry_regs(regs); - trace_hardirqs_off_finish(); - instrumentation_end(); - - ret.exit_rcu = true; - return ret; - } - - /* - * If RCU is watching then RCU only wants to check whether it needs - * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() - * already contains a warning when RCU is not watching, so no point - * in having another one here. - */ - lockdep_hardirqs_off(CALLER_ADDR0); - instrumentation_begin(); - kmsan_unpoison_entry_regs(regs); - rcu_irq_enter_check_tick(); - trace_hardirqs_off_finish(); - instrumentation_end(); - - return ret; + return irqentry_enter_from_kernel_mode(regs); } /** @@ -212,43 +158,10 @@ void dynamic_irqentry_exit_cond_resched(void) noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) { - lockdep_assert_irqs_disabled(); - - /* Check whether this returns to user mode */ - if (user_mode(regs)) { + if (user_mode(regs)) irqentry_exit_to_user_mode(regs); - } else if (!regs_irqs_disabled(regs)) { - /* - * If RCU was not watching on entry this needs to be done - * carefully and needs the same ordering of lockdep/tracing - * and RCU as the return to user mode path. - */ - if (state.exit_rcu) { - instrumentation_begin(); - /* Tell the tracer that IRET will enable interrupts */ - trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(); - instrumentation_end(); - ct_irq_exit(); - lockdep_hardirqs_on(CALLER_ADDR0); - return; - } - - instrumentation_begin(); - if (IS_ENABLED(CONFIG_PREEMPTION)) - irqentry_exit_cond_resched(); - - /* Covers both tracing and lockdep */ - trace_hardirqs_on(); - instrumentation_end(); - } else { - /* - * IRQ flags state is correct already. Just tell RCU if it - * was not watching on entry. - */ - if (state.exit_rcu) - ct_irq_exit(); - } + else + irqentry_exit_to_kernel_mode(regs, state); } irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)