Merge tag 'x86-urgent-2025-12-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:

 - Fix FPU core dumps on certain CPU models

 - Fix htmldocs build warning

 - Export TLB tracing event name via header

 - Remove unused constant from <linux/mm_types.h>

 - Fix comments

 - Fix whitespace noise in documentation

 - Fix variadic structure's definition to un-confuse UBSAN

 - Fix posted MSI interrupts irq_retrigger() bug

 - Fix asm build failure with older GCC builds

* tag 'x86-urgent-2025-12-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/bug: Fix old GCC compile fails
  x86/msi: Make irq_retrigger() functional for posted MSI
  x86/platform/uv: Fix UBSAN array-index-out-of-bounds
  mm: Remove tlb_flush_reason::NR_TLB_FLUSH_REASONS from <linux/mm_types.h>
  x86/mm/tlb/trace: Export the TLB_REMOTE_WRONG_CPU enum in <trace/events/tlb.h>
  x86/sgx: Remove unmatched quote in __sgx_encl_extend function comment
  x86/boot/Documentation: Fix whitespace noise in boot.rst
  x86/fpu: Fix FPU state core dump truncation on CPUs with no extended xfeatures
  x86/boot/Documentation: Fix htmldocs build warning due to malformed table in boot.rst
This commit is contained in:
Linus Torvalds
2025-12-21 14:41:29 -08:00
10 changed files with 138 additions and 108 deletions

View File

@@ -95,26 +95,26 @@ Memory Layout
The traditional memory map for the kernel loader, used for Image or The traditional memory map for the kernel loader, used for Image or
zImage kernels, typically looks like:: zImage kernels, typically looks like::
| | | |
0A0000 +------------------------+ 0A0000 +------------------------+
| Reserved for BIOS | Do not use. Reserved for BIOS EBDA. | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
09A000 +------------------------+ 09A000 +------------------------+
| Command line | | Command line |
| Stack/heap | For use by the kernel real-mode code. | Stack/heap | For use by the kernel real-mode code.
098000 +------------------------+ 098000 +------------------------+
| Kernel setup | The kernel real-mode code. | Kernel setup | The kernel real-mode code.
090200 +------------------------+ 090200 +------------------------+
| Kernel boot sector | The kernel legacy boot sector. | Kernel boot sector | The kernel legacy boot sector.
090000 +------------------------+ 090000 +------------------------+
| Protected-mode kernel | The bulk of the kernel image. | Protected-mode kernel | The bulk of the kernel image.
010000 +------------------------+ 010000 +------------------------+
| Boot loader | <- Boot sector entry point 0000:7C00 | Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+ 001000 +------------------------+
| Reserved for MBR/BIOS | | Reserved for MBR/BIOS |
000800 +------------------------+ 000800 +------------------------+
| Typically used by MBR | | Typically used by MBR |
000600 +------------------------+ 000600 +------------------------+
| BIOS use only | | BIOS use only |
000000 +------------------------+ 000000 +------------------------+
When using bzImage, the protected-mode kernel was relocated to When using bzImage, the protected-mode kernel was relocated to
@@ -142,27 +142,27 @@ above the 0x9A000 point; too many BIOSes will break above that point.
For a modern bzImage kernel with boot protocol version >= 2.02, a For a modern bzImage kernel with boot protocol version >= 2.02, a
memory layout like the following is suggested:: memory layout like the following is suggested::
~ ~ ~ ~
| Protected-mode kernel | | Protected-mode kernel |
100000 +------------------------+ 100000 +------------------------+
| I/O memory hole | | I/O memory hole |
0A0000 +------------------------+ 0A0000 +------------------------+
| Reserved for BIOS | Leave as much as possible unused | Reserved for BIOS | Leave as much as possible unused
~ ~ ~ ~
| Command line | (Can also be below the X+10000 mark) | Command line | (Can also be below the X+10000 mark)
X+10000 +------------------------+ X+10000 +------------------------+
| Stack/heap | For use by the kernel real-mode code. | Stack/heap | For use by the kernel real-mode code.
X+08000 +------------------------+ X+08000 +------------------------+
| Kernel setup | The kernel real-mode code. | Kernel setup | The kernel real-mode code.
| Kernel boot sector | The kernel legacy boot sector. | Kernel boot sector | The kernel legacy boot sector.
X +------------------------+ X +------------------------+
| Boot loader | <- Boot sector entry point 0000:7C00 | Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+ 001000 +------------------------+
| Reserved for MBR/BIOS | | Reserved for MBR/BIOS |
000800 +------------------------+ 000800 +------------------------+
| Typically used by MBR | | Typically used by MBR |
000600 +------------------------+ 000600 +------------------------+
| BIOS use only | | BIOS use only |
000000 +------------------------+ 000000 +------------------------+
... where the address X is as low as the design of the boot loader permits. ... where the address X is as low as the design of the boot loader permits.
@@ -433,7 +433,7 @@ Protocol: 2.00+
Assigned boot loader IDs: Assigned boot loader IDs:
== ======================================= ==== =======================================
0x0 LILO 0x0 LILO
(0x00 reserved for pre-2.00 bootloader) (0x00 reserved for pre-2.00 bootloader)
0x1 Loadlin 0x1 Loadlin
@@ -456,7 +456,7 @@ Protocol: 2.00+
<http://sebastian-plotz.blogspot.de> <http://sebastian-plotz.blogspot.de>
0x12 OVMF UEFI virtualization stack 0x12 OVMF UEFI virtualization stack
0x13 barebox 0x13 barebox
== ======================================= ==== =======================================
Please contact <hpa@zytor.com> if you need a bootloader ID value assigned. Please contact <hpa@zytor.com> if you need a bootloader ID value assigned.
@@ -809,12 +809,12 @@ Protocol: 2.09+
as follow:: as follow::
struct setup_data { struct setup_data {
__u64 next; __u64 next;
__u32 type; __u32 type;
__u32 len; __u32 len;
__u8 data[]; __u8 data[];
} }
Where, the next is a 64-bit physical pointer to the next node of Where, the next is a 64-bit physical pointer to the next node of
linked list, the next field of the last node is 0; the type is used linked list, the next field of the last node is 0; the type is used
to identify the contents of data; the len is the length of data to identify the contents of data; the len is the length of data
@@ -835,10 +835,10 @@ Protocol: 2.09+
protocol 2.15:: protocol 2.15::
struct setup_indirect { struct setup_indirect {
__u32 type; __u32 type;
__u32 reserved; /* Reserved, must be set to zero. */ __u32 reserved; /* Reserved, must be set to zero. */
__u64 len; __u64 len;
__u64 addr; __u64 addr;
}; };
The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
@@ -850,15 +850,15 @@ Protocol: 2.09+
In this case setup_data and setup_indirect will look like this:: In this case setup_data and setup_indirect will look like this::
struct setup_data { struct setup_data {
.next = 0, /* or <addr_of_next_setup_data_struct> */ .next = 0, /* or <addr_of_next_setup_data_struct> */
.type = SETUP_INDIRECT, .type = SETUP_INDIRECT,
.len = sizeof(setup_indirect), .len = sizeof(setup_indirect),
.data[sizeof(setup_indirect)] = (struct setup_indirect) { .data[sizeof(setup_indirect)] = (struct setup_indirect) {
.type = SETUP_INDIRECT | SETUP_E820_EXT, .type = SETUP_INDIRECT | SETUP_E820_EXT,
.reserved = 0, .reserved = 0,
.len = <len_of_SETUP_E820_EXT_data>, .len = <len_of_SETUP_E820_EXT_data>,
.addr = <addr_of_SETUP_E820_EXT_data>, .addr = <addr_of_SETUP_E820_EXT_data>,
}, },
} }
.. note:: .. note::
@@ -897,11 +897,11 @@ Offset/size: 0x260/4
The kernel runtime start address is determined by the following algorithm:: The kernel runtime start address is determined by the following algorithm::
if (relocatable_kernel) { if (relocatable_kernel) {
if (load_address < pref_address) if (load_address < pref_address)
load_address = pref_address; load_address = pref_address;
runtime_start = align_up(load_address, kernel_alignment); runtime_start = align_up(load_address, kernel_alignment);
} else { } else {
runtime_start = pref_address; runtime_start = pref_address;
} }
Hence the necessary memory window location and size can be estimated by Hence the necessary memory window location and size can be estimated by
@@ -975,22 +975,22 @@ after kernel_info_var_len_data label. Each chunk of variable size data has to
be prefixed with header/magic and its size, e.g.:: be prefixed with header/magic and its size, e.g.::
kernel_info: kernel_info:
.ascii "LToP" /* Header, Linux top (structure). */ .ascii "LToP" /* Header, Linux top (structure). */
.long kernel_info_var_len_data - kernel_info .long kernel_info_var_len_data - kernel_info
.long kernel_info_end - kernel_info .long kernel_info_end - kernel_info
.long 0x01234567 /* Some fixed size data for the bootloaders. */ .long 0x01234567 /* Some fixed size data for the bootloaders. */
kernel_info_var_len_data: kernel_info_var_len_data:
example_struct: /* Some variable size data for the bootloaders. */ example_struct: /* Some variable size data for the bootloaders. */
.ascii "0123" /* Header/Magic. */ .ascii "0123" /* Header/Magic. */
.long example_struct_end - example_struct .long example_struct_end - example_struct
.ascii "Struct" .ascii "Struct"
.long 0x89012345 .long 0x89012345
example_struct_end: example_struct_end:
example_strings: /* Some variable size data for the bootloaders. */ example_strings: /* Some variable size data for the bootloaders. */
.ascii "ABCD" /* Header/Magic. */ .ascii "ABCD" /* Header/Magic. */
.long example_strings_end - example_strings .long example_strings_end - example_strings
.asciz "String_0" .asciz "String_0"
.asciz "String_1" .asciz "String_1"
example_strings_end: example_strings_end:
kernel_info_end: kernel_info_end:
@@ -1132,53 +1132,53 @@ Such a boot loader should enter the following fields in the header::
unsigned long base_ptr; /* base address for real-mode segment */ unsigned long base_ptr; /* base address for real-mode segment */
if (setup_sects == 0) if (setup_sects == 0)
setup_sects = 4; setup_sects = 4;
if (protocol >= 0x0200) { if (protocol >= 0x0200) {
type_of_loader = <type code>; type_of_loader = <type code>;
if (loading_initrd) { if (loading_initrd) {
ramdisk_image = <initrd_address>; ramdisk_image = <initrd_address>;
ramdisk_size = <initrd_size>; ramdisk_size = <initrd_size>;
} }
if (protocol >= 0x0202 && loadflags & 0x01) if (protocol >= 0x0202 && loadflags & 0x01)
heap_end = 0xe000; heap_end = 0xe000;
else else
heap_end = 0x9800; heap_end = 0x9800;
if (protocol >= 0x0201) { if (protocol >= 0x0201) {
heap_end_ptr = heap_end - 0x200; heap_end_ptr = heap_end - 0x200;
loadflags |= 0x80; /* CAN_USE_HEAP */ loadflags |= 0x80; /* CAN_USE_HEAP */
} }
if (protocol >= 0x0202) { if (protocol >= 0x0202) {
cmd_line_ptr = base_ptr + heap_end; cmd_line_ptr = base_ptr + heap_end;
strcpy(cmd_line_ptr, cmdline); strcpy(cmd_line_ptr, cmdline);
} else { } else {
cmd_line_magic = 0xA33F; cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end; cmd_line_offset = heap_end;
setup_move_size = heap_end + strlen(cmdline) + 1; setup_move_size = heap_end + strlen(cmdline) + 1;
strcpy(base_ptr + cmd_line_offset, cmdline); strcpy(base_ptr + cmd_line_offset, cmdline);
} }
} else { } else {
/* Very old kernel */ /* Very old kernel */
heap_end = 0x9800; heap_end = 0x9800;
cmd_line_magic = 0xA33F; cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end; cmd_line_offset = heap_end;
/* A very old kernel MUST have its real-mode code loaded at 0x90000 */ /* A very old kernel MUST have its real-mode code loaded at 0x90000 */
if (base_ptr != 0x90000) { if (base_ptr != 0x90000) {
/* Copy the real-mode kernel */ /* Copy the real-mode kernel */
memcpy(0x90000, base_ptr, (setup_sects + 1) * 512); memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
base_ptr = 0x90000; /* Relocated */ base_ptr = 0x90000; /* Relocated */
} }
strcpy(0x90000 + cmd_line_offset, cmdline); strcpy(0x90000 + cmd_line_offset, cmdline);
/* It is recommended to clear memory up to the 32K mark */ /* It is recommended to clear memory up to the 32K mark */
memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512); memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
} }

View File

@@ -15,7 +15,7 @@ extern void __WARN_trap(struct bug_entry *bug, ...);
/* /*
* Despite that some emulators terminate on UD2, we use it for WARN(). * Despite that some emulators terminate on UD2, we use it for WARN().
*/ */
#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b) #define ASM_UD2 __ASM_FORM(ud2)
#define INSN_UD2 0x0b0f #define INSN_UD2 0x0b0f
#define LEN_UD2 2 #define LEN_UD2 2

View File

@@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(const char *msg)
} }
#endif /* CONFIG_IRQ_REMAP */ #endif /* CONFIG_IRQ_REMAP */
#ifdef CONFIG_X86_POSTED_MSI
void intel_ack_posted_msi_irq(struct irq_data *irqd);
#else
#define intel_ack_posted_msi_irq NULL
#endif
#endif /* __X86_IRQ_REMAPPING_H */ #endif /* __X86_IRQ_REMAPPING_H */

View File

@@ -122,7 +122,7 @@ struct uv_systab {
struct { struct {
u32 type:8; /* type of entry */ u32 type:8; /* type of entry */
u32 offset:24; /* byte offset from struct start to entry */ u32 offset:24; /* byte offset from struct start to entry */
} entry[1]; /* additional entries follow */ } entry[]; /* additional entries follow */
}; };
extern struct uv_systab *uv_systab; extern struct uv_systab *uv_systab;

View File

@@ -242,7 +242,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
/* /*
* If the caller requires measurement of the page as a proof for the content, * If the caller requires measurement of the page as a proof for the content,
* use EEXTEND to add a measurement for 256 bytes of the page. Repeat this * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
* operation until the entire page is measured." * operation until the entire page is measured.
*/ */
static int __sgx_encl_extend(struct sgx_encl *encl, static int __sgx_encl_extend(struct sgx_encl *encl,
struct sgx_epc_page *epc_page) struct sgx_epc_page *epc_page)

View File

@@ -1946,7 +1946,7 @@ static int dump_xsave_layout_desc(struct coredump_params *cprm)
}; };
if (!dump_emit(cprm, &xc, sizeof(xc))) if (!dump_emit(cprm, &xc, sizeof(xc)))
return 0; return -1;
num_records++; num_records++;
} }
@@ -1984,7 +1984,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm)
return 1; return 1;
num_records = dump_xsave_layout_desc(cprm); num_records = dump_xsave_layout_desc(cprm);
if (!num_records) if (num_records < 0)
return 1; return 1;
/* Total size should be equal to the number of records */ /* Total size should be equal to the number of records */

View File

@@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
/* Posted Interrupt Descriptors for coalesced MSIs to be posted */ /* Posted Interrupt Descriptors for coalesced MSIs to be posted */
DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
void intel_posted_msi_init(void) void intel_posted_msi_init(void)
{ {
@@ -414,6 +415,25 @@ void intel_posted_msi_init(void)
this_cpu_write(posted_msi_pi_desc.ndst, destination); this_cpu_write(posted_msi_pi_desc.ndst, destination);
} }
void intel_ack_posted_msi_irq(struct irq_data *irqd)
{
irq_move_irq(irqd);
/*
* Handle the rare case that irq_retrigger() raised the actual
* assigned vector on the target CPU, which means that it was not
* invoked via the posted MSI handler below. In that case APIC EOI
* is required as otherwise the ISR entry becomes stale and lower
* priority interrupts are never going to be delivered after that.
*
* If the posted handler invoked the device interrupt handler then
* the EOI would be premature because it would acknowledge the
* posted vector.
*/
if (unlikely(!__this_cpu_read(posted_msi_handler_active)))
apic_eoi();
}
static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs) static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
{ {
unsigned long pir_copy[NR_PIR_WORDS]; unsigned long pir_copy[NR_PIR_WORDS];
@@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
pid = this_cpu_ptr(&posted_msi_pi_desc); pid = this_cpu_ptr(&posted_msi_pi_desc);
/* Mark the handler active for intel_ack_posted_msi_irq() */
__this_cpu_write(posted_msi_handler_active, true);
inc_irq_stat(posted_msi_notification_count); inc_irq_stat(posted_msi_notification_count);
irq_enter(); irq_enter();
@@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
apic_eoi(); apic_eoi();
irq_exit(); irq_exit();
__this_cpu_write(posted_msi_handler_active, false);
set_irq_regs(old_regs); set_irq_regs(old_regs);
} }
#endif /* X86_POSTED_MSI */ #endif /* X86_POSTED_MSI */

View File

@@ -1303,17 +1303,17 @@ static struct irq_chip intel_ir_chip = {
* irq_enter(); * irq_enter();
* handle_edge_irq() * handle_edge_irq()
* irq_chip_ack_parent() * irq_chip_ack_parent()
* irq_move_irq(); // No EOI * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event() * handle_irq_event()
* driver_handler() * driver_handler()
* handle_edge_irq() * handle_edge_irq()
* irq_chip_ack_parent() * irq_chip_ack_parent()
* irq_move_irq(); // No EOI * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event() * handle_irq_event()
* driver_handler() * driver_handler()
* handle_edge_irq() * handle_edge_irq()
* irq_chip_ack_parent() * irq_chip_ack_parent()
* irq_move_irq(); // No EOI * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event() * handle_irq_event()
* driver_handler() * driver_handler()
* apic_eoi() * apic_eoi()
@@ -1322,7 +1322,7 @@ static struct irq_chip intel_ir_chip = {
*/ */
static struct irq_chip intel_ir_chip_post_msi = { static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST", .name = "INTEL-IR-POST",
.irq_ack = irq_move_irq, .irq_ack = intel_ack_posted_msi_irq,
.irq_set_affinity = intel_ir_set_affinity, .irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg, .irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,

View File

@@ -1631,7 +1631,6 @@ enum tlb_flush_reason {
TLB_LOCAL_MM_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN,
TLB_REMOTE_SEND_IPI, TLB_REMOTE_SEND_IPI,
TLB_REMOTE_WRONG_CPU, TLB_REMOTE_WRONG_CPU,
NR_TLB_FLUSH_REASONS,
}; };
/** /**

View File

@@ -12,8 +12,9 @@
EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \ EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \
EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \ EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \
EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \ EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \
EM( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) \ EM( TLB_LOCAL_MM_SHOOTDOWN, "local MM shootdown" ) \
EMe( TLB_REMOTE_SEND_IPI, "remote ipi send" ) EM( TLB_REMOTE_SEND_IPI, "remote IPI send" ) \
EMe( TLB_REMOTE_WRONG_CPU, "remote wrong CPU" )
/* /*
* First define the enums in TLB_FLUSH_REASON to be exported to userspace * First define the enums in TLB_FLUSH_REASON to be exported to userspace