Merge tag 'x86-urgent-2025-12-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:

 - Fix FPU core dumps on certain CPU models

 - Fix htmldocs build warning

 - Export TLB tracing event name via header

 - Remove unused constant from <linux/mm_types.h>

 - Fix comments

 - Fix whitespace noise in documentation

 - Fix variadic structure's definition to un-confuse UBSAN

 - Fix posted MSI interrupts irq_retrigger() bug

 - Fix asm build failure with older GCC builds

* tag 'x86-urgent-2025-12-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/bug: Fix old GCC compile fails
  x86/msi: Make irq_retrigger() functional for posted MSI
  x86/platform/uv: Fix UBSAN array-index-out-of-bounds
  mm: Remove tlb_flush_reason::NR_TLB_FLUSH_REASONS from <linux/mm_types.h>
  x86/mm/tlb/trace: Export the TLB_REMOTE_WRONG_CPU enum in <trace/events/tlb.h>
  x86/sgx: Remove unmatched quote in __sgx_encl_extend function comment
  x86/boot/Documentation: Fix whitespace noise in boot.rst
  x86/fpu: Fix FPU state core dump truncation on CPUs with no extended xfeatures
  x86/boot/Documentation: Fix htmldocs build warning due to malformed table in boot.rst
This commit is contained in:
Linus Torvalds
2025-12-21 14:41:29 -08:00
10 changed files with 138 additions and 108 deletions

View File

@@ -95,26 +95,26 @@ Memory Layout
The traditional memory map for the kernel loader, used for Image or
zImage kernels, typically looks like::
| |
| |
0A0000 +------------------------+
| Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
| Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
09A000 +------------------------+
| Command line |
| Stack/heap | For use by the kernel real-mode code.
| Command line |
| Stack/heap | For use by the kernel real-mode code.
098000 +------------------------+
| Kernel setup | The kernel real-mode code.
| Kernel setup | The kernel real-mode code.
090200 +------------------------+
| Kernel boot sector | The kernel legacy boot sector.
| Kernel boot sector | The kernel legacy boot sector.
090000 +------------------------+
| Protected-mode kernel | The bulk of the kernel image.
| Protected-mode kernel | The bulk of the kernel image.
010000 +------------------------+
| Boot loader | <- Boot sector entry point 0000:7C00
| Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+
| Reserved for MBR/BIOS |
| Reserved for MBR/BIOS |
000800 +------------------------+
| Typically used by MBR |
| Typically used by MBR |
000600 +------------------------+
| BIOS use only |
| BIOS use only |
000000 +------------------------+
When using bzImage, the protected-mode kernel was relocated to
@@ -142,27 +142,27 @@ above the 0x9A000 point; too many BIOSes will break above that point.
For a modern bzImage kernel with boot protocol version >= 2.02, a
memory layout like the following is suggested::
~ ~
| Protected-mode kernel |
~ ~
| Protected-mode kernel |
100000 +------------------------+
| I/O memory hole |
| I/O memory hole |
0A0000 +------------------------+
| Reserved for BIOS | Leave as much as possible unused
~ ~
| Command line | (Can also be below the X+10000 mark)
| Reserved for BIOS | Leave as much as possible unused
~ ~
| Command line | (Can also be below the X+10000 mark)
X+10000 +------------------------+
| Stack/heap | For use by the kernel real-mode code.
| Stack/heap | For use by the kernel real-mode code.
X+08000 +------------------------+
| Kernel setup | The kernel real-mode code.
| Kernel boot sector | The kernel legacy boot sector.
| Kernel setup | The kernel real-mode code.
| Kernel boot sector | The kernel legacy boot sector.
X +------------------------+
| Boot loader | <- Boot sector entry point 0000:7C00
| Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+
| Reserved for MBR/BIOS |
| Reserved for MBR/BIOS |
000800 +------------------------+
| Typically used by MBR |
| Typically used by MBR |
000600 +------------------------+
| BIOS use only |
| BIOS use only |
000000 +------------------------+
... where the address X is as low as the design of the boot loader permits.
@@ -433,7 +433,7 @@ Protocol: 2.00+
Assigned boot loader IDs:
== =======================================
==== =======================================
0x0 LILO
(0x00 reserved for pre-2.00 bootloader)
0x1 Loadlin
@@ -456,7 +456,7 @@ Protocol: 2.00+
<http://sebastian-plotz.blogspot.de>
0x12 OVMF UEFI virtualization stack
0x13 barebox
== =======================================
==== =======================================
Please contact <hpa@zytor.com> if you need a bootloader ID value assigned.
@@ -809,12 +809,12 @@ Protocol: 2.09+
as follow::
struct setup_data {
__u64 next;
__u32 type;
__u32 len;
__u8 data[];
__u64 next;
__u32 type;
__u32 len;
__u8 data[];
}
Where, the next is a 64-bit physical pointer to the next node of
linked list, the next field of the last node is 0; the type is used
to identify the contents of data; the len is the length of data
@@ -835,10 +835,10 @@ Protocol: 2.09+
protocol 2.15::
struct setup_indirect {
__u32 type;
__u32 reserved; /* Reserved, must be set to zero. */
__u64 len;
__u64 addr;
__u32 type;
__u32 reserved; /* Reserved, must be set to zero. */
__u64 len;
__u64 addr;
};
The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
@@ -850,15 +850,15 @@ Protocol: 2.09+
In this case setup_data and setup_indirect will look like this::
struct setup_data {
.next = 0, /* or <addr_of_next_setup_data_struct> */
.type = SETUP_INDIRECT,
.len = sizeof(setup_indirect),
.data[sizeof(setup_indirect)] = (struct setup_indirect) {
.type = SETUP_INDIRECT | SETUP_E820_EXT,
.reserved = 0,
.len = <len_of_SETUP_E820_EXT_data>,
.addr = <addr_of_SETUP_E820_EXT_data>,
},
.next = 0, /* or <addr_of_next_setup_data_struct> */
.type = SETUP_INDIRECT,
.len = sizeof(setup_indirect),
.data[sizeof(setup_indirect)] = (struct setup_indirect) {
.type = SETUP_INDIRECT | SETUP_E820_EXT,
.reserved = 0,
.len = <len_of_SETUP_E820_EXT_data>,
.addr = <addr_of_SETUP_E820_EXT_data>,
},
}
.. note::
@@ -897,11 +897,11 @@ Offset/size: 0x260/4
The kernel runtime start address is determined by the following algorithm::
if (relocatable_kernel) {
if (load_address < pref_address)
load_address = pref_address;
runtime_start = align_up(load_address, kernel_alignment);
if (load_address < pref_address)
load_address = pref_address;
runtime_start = align_up(load_address, kernel_alignment);
} else {
runtime_start = pref_address;
runtime_start = pref_address;
}
Hence the necessary memory window location and size can be estimated by
@@ -975,22 +975,22 @@ after kernel_info_var_len_data label. Each chunk of variable size data has to
be prefixed with header/magic and its size, e.g.::
kernel_info:
.ascii "LToP" /* Header, Linux top (structure). */
.long kernel_info_var_len_data - kernel_info
.long kernel_info_end - kernel_info
.long 0x01234567 /* Some fixed size data for the bootloaders. */
.ascii "LToP" /* Header, Linux top (structure). */
.long kernel_info_var_len_data - kernel_info
.long kernel_info_end - kernel_info
.long 0x01234567 /* Some fixed size data for the bootloaders. */
kernel_info_var_len_data:
example_struct: /* Some variable size data for the bootloaders. */
.ascii "0123" /* Header/Magic. */
.long example_struct_end - example_struct
.ascii "Struct"
.long 0x89012345
.ascii "0123" /* Header/Magic. */
.long example_struct_end - example_struct
.ascii "Struct"
.long 0x89012345
example_struct_end:
example_strings: /* Some variable size data for the bootloaders. */
.ascii "ABCD" /* Header/Magic. */
.long example_strings_end - example_strings
.asciz "String_0"
.asciz "String_1"
.ascii "ABCD" /* Header/Magic. */
.long example_strings_end - example_strings
.asciz "String_0"
.asciz "String_1"
example_strings_end:
kernel_info_end:
@@ -1132,53 +1132,53 @@ Such a boot loader should enter the following fields in the header::
unsigned long base_ptr; /* base address for real-mode segment */
if (setup_sects == 0)
setup_sects = 4;
setup_sects = 4;
if (protocol >= 0x0200) {
type_of_loader = <type code>;
if (loading_initrd) {
ramdisk_image = <initrd_address>;
ramdisk_size = <initrd_size>;
}
type_of_loader = <type code>;
if (loading_initrd) {
ramdisk_image = <initrd_address>;
ramdisk_size = <initrd_size>;
}
if (protocol >= 0x0202 && loadflags & 0x01)
heap_end = 0xe000;
else
heap_end = 0x9800;
if (protocol >= 0x0202 && loadflags & 0x01)
heap_end = 0xe000;
else
heap_end = 0x9800;
if (protocol >= 0x0201) {
heap_end_ptr = heap_end - 0x200;
loadflags |= 0x80; /* CAN_USE_HEAP */
}
if (protocol >= 0x0201) {
heap_end_ptr = heap_end - 0x200;
loadflags |= 0x80; /* CAN_USE_HEAP */
}
if (protocol >= 0x0202) {
cmd_line_ptr = base_ptr + heap_end;
strcpy(cmd_line_ptr, cmdline);
} else {
cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end;
setup_move_size = heap_end + strlen(cmdline) + 1;
strcpy(base_ptr + cmd_line_offset, cmdline);
}
if (protocol >= 0x0202) {
cmd_line_ptr = base_ptr + heap_end;
strcpy(cmd_line_ptr, cmdline);
} else {
cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end;
setup_move_size = heap_end + strlen(cmdline) + 1;
strcpy(base_ptr + cmd_line_offset, cmdline);
}
} else {
/* Very old kernel */
/* Very old kernel */
heap_end = 0x9800;
heap_end = 0x9800;
cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end;
cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end;
/* A very old kernel MUST have its real-mode code loaded at 0x90000 */
if (base_ptr != 0x90000) {
/* Copy the real-mode kernel */
memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
base_ptr = 0x90000; /* Relocated */
}
/* A very old kernel MUST have its real-mode code loaded at 0x90000 */
if (base_ptr != 0x90000) {
/* Copy the real-mode kernel */
memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
base_ptr = 0x90000; /* Relocated */
}
strcpy(0x90000 + cmd_line_offset, cmdline);
strcpy(0x90000 + cmd_line_offset, cmdline);
/* It is recommended to clear memory up to the 32K mark */
memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
/* It is recommended to clear memory up to the 32K mark */
memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
}

View File

@@ -15,7 +15,7 @@ extern void __WARN_trap(struct bug_entry *bug, ...);
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
*/
#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b)
#define ASM_UD2 __ASM_FORM(ud2)
#define INSN_UD2 0x0b0f
#define LEN_UD2 2

View File

@@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(const char *msg)
}
#endif /* CONFIG_IRQ_REMAP */
#ifdef CONFIG_X86_POSTED_MSI
void intel_ack_posted_msi_irq(struct irq_data *irqd);
#else
#define intel_ack_posted_msi_irq NULL
#endif
#endif /* __X86_IRQ_REMAPPING_H */

View File

@@ -122,7 +122,7 @@ struct uv_systab {
struct {
u32 type:8; /* type of entry */
u32 offset:24; /* byte offset from struct start to entry */
} entry[1]; /* additional entries follow */
} entry[]; /* additional entries follow */
};
extern struct uv_systab *uv_systab;

View File

@@ -242,7 +242,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
/*
* If the caller requires measurement of the page as a proof for the content,
* use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
* operation until the entire page is measured."
* operation until the entire page is measured.
*/
static int __sgx_encl_extend(struct sgx_encl *encl,
struct sgx_epc_page *epc_page)

View File

@@ -1946,7 +1946,7 @@ static int dump_xsave_layout_desc(struct coredump_params *cprm)
};
if (!dump_emit(cprm, &xc, sizeof(xc)))
return 0;
return -1;
num_records++;
}
@@ -1984,7 +1984,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm)
return 1;
num_records = dump_xsave_layout_desc(cprm);
if (!num_records)
if (num_records < 0)
return 1;
/* Total size should be equal to the number of records */

View File

@@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
/* Posted Interrupt Descriptors for coalesced MSIs to be posted */
DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
void intel_posted_msi_init(void)
{
@@ -414,6 +415,25 @@ void intel_posted_msi_init(void)
this_cpu_write(posted_msi_pi_desc.ndst, destination);
}
void intel_ack_posted_msi_irq(struct irq_data *irqd)
{
irq_move_irq(irqd);
/*
* Handle the rare case that irq_retrigger() raised the actual
* assigned vector on the target CPU, which means that it was not
* invoked via the posted MSI handler below. In that case APIC EOI
* is required as otherwise the ISR entry becomes stale and lower
* priority interrupts are never going to be delivered after that.
*
* If the posted handler invoked the device interrupt handler then
* the EOI would be premature because it would acknowledge the
* posted vector.
*/
if (unlikely(!__this_cpu_read(posted_msi_handler_active)))
apic_eoi();
}
static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
{
unsigned long pir_copy[NR_PIR_WORDS];
@@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
pid = this_cpu_ptr(&posted_msi_pi_desc);
/* Mark the handler active for intel_ack_posted_msi_irq() */
__this_cpu_write(posted_msi_handler_active, true);
inc_irq_stat(posted_msi_notification_count);
irq_enter();
@@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
apic_eoi();
irq_exit();
__this_cpu_write(posted_msi_handler_active, false);
set_irq_regs(old_regs);
}
#endif /* X86_POSTED_MSI */

View File

@@ -1303,17 +1303,17 @@ static struct irq_chip intel_ir_chip = {
* irq_enter();
* handle_edge_irq()
* irq_chip_ack_parent()
* irq_move_irq(); // No EOI
* intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
* irq_move_irq(); // No EOI
* intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
* irq_move_irq(); // No EOI
* intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* apic_eoi()
@@ -1322,7 +1322,7 @@ static struct irq_chip intel_ir_chip = {
*/
static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
.irq_ack = irq_move_irq,
.irq_ack = intel_ack_posted_msi_irq,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,

View File

@@ -1631,7 +1631,6 @@ enum tlb_flush_reason {
TLB_LOCAL_MM_SHOOTDOWN,
TLB_REMOTE_SEND_IPI,
TLB_REMOTE_WRONG_CPU,
NR_TLB_FLUSH_REASONS,
};
/**

View File

@@ -12,8 +12,9 @@
EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \
EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \
EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \
EM( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) \
EMe( TLB_REMOTE_SEND_IPI, "remote ipi send" )
EM( TLB_LOCAL_MM_SHOOTDOWN, "local MM shootdown" ) \
EM( TLB_REMOTE_SEND_IPI, "remote IPI send" ) \
EMe( TLB_REMOTE_WRONG_CPU, "remote wrong CPU" )
/*
* First define the enums in TLB_FLUSH_REASON to be exported to userspace