From cf8771ca4cdbd78232338652b98a7c5c9e0a6184 Mon Sep 17 00:00:00 2001 From: Tobias Huschle Date: Fri, 6 Mar 2026 17:16:30 +0100 Subject: [PATCH 1/4] mm/page_table_check: Pass mm_struct to pxx_user_accessible_page() Unlike other architectures, s390 does not have means to distinguish kernel vs user page table entries - neither an entry itself, nor the address could be used for that. It is only the mm_struct that indicates whether an entry in question is mapped to a user space. So pass mm_struct to pxx_user_accessible_page() callbacks. [agordeev@linux.ibm.com: rephrased commit message, removed braces] Acked-by: Madhavan Srinivasan Reviewed-by: Gerald Schaefer Reviewed-by: Andrew Morton Reviewed-by: Ritesh Harjani (IBM) #powerpc Signed-off-by: Tobias Huschle Signed-off-by: Alexander Gordeev Link: https://lore.kernel.org/r/ca77f3489453c2fe01b25e50e53b778929e0dfc5.1772812343.git.agordeev@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/arm64/include/asm/pgtable.h | 6 +++--- arch/powerpc/include/asm/book3s/32/pgtable.h | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +++++----- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- arch/powerpc/include/asm/pgtable.h | 4 ++-- arch/riscv/include/asm/pgtable.h | 6 +++--- arch/x86/include/asm/pgtable.h | 6 +++--- mm/page_table_check.c | 15 ++++++--------- 8 files changed, 24 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b3e58735c49b..ccf0e0638767 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1263,17 +1263,17 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma, #endif #ifdef CONFIG_PAGE_TABLE_CHECK -static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) +static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte) { return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); } -static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr) +static inline bool pmd_user_accessible_page(struct mm_struct *mm, unsigned long addr, pmd_t pmd) { return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } -static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr) +static inline bool pud_user_accessible_page(struct mm_struct *mm, unsigned long addr, pud_t pud) { return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); } diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 001e28f9eabc..75195bb44d06 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -438,7 +438,7 @@ static inline bool pte_access_permitted(pte_t pte, bool write) return true; } -static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) +static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte) { return pte_present(pte) && !is_kernel_addr(addr); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1a91762b455d..a56df313b585 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -549,7 +549,7 @@ static inline bool pte_access_permitted(pte_t pte, bool write) return arch_pte_access_permitted(pte_val(pte), write, 0); } -static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) +static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte) { return pte_present(pte) && pte_user(pte); } @@ -925,9 +925,9 @@ static inline bool pud_access_permitted(pud_t pud, bool write) } #define pud_user_accessible_page pud_user_accessible_page -static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr) +static inline bool pud_user_accessible_page(struct mm_struct *mm, unsigned long addr, pud_t pud) { - return pud_leaf(pud) && pte_user_accessible_page(pud_pte(pud), addr); + return pud_leaf(pud) && pte_user_accessible_page(mm, addr, pud_pte(pud)); } #define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) }) @@ -1096,9 +1096,9 @@ static inline bool pmd_access_permitted(pmd_t pmd, bool write) } #define pmd_user_accessible_page pmd_user_accessible_page -static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr) +static inline bool pmd_user_accessible_page(struct mm_struct *mm, unsigned long addr, pmd_t pmd) { - return pmd_leaf(pmd) && pte_user_accessible_page(pmd_pte(pmd), addr); + return pmd_leaf(pmd) && pte_user_accessible_page(mm, addr, pmd_pte(pmd)); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index e6da5eaccff6..0665d0abe89f 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -249,7 +249,7 @@ static inline bool pte_access_permitted(pte_t pte, bool write) return true; } -static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) +static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte) { return pte_present(pte) && !is_kernel_addr(addr); } diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index dcd3a88caaf6..29ed509cd235 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -205,11 +205,11 @@ static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size) #endif /* CONFIG_PPC64 */ #ifndef pmd_user_accessible_page -#define pmd_user_accessible_page(pmd, addr) false +#define pmd_user_accessible_page(mm, addr, pmd) false #endif #ifndef pud_user_accessible_page -#define pud_user_accessible_page(pud, addr) false +#define pud_user_accessible_page(mm, addr, pud) false #endif #endif /* __ASSEMBLER__ */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 08d1ca047104..affe46cf3bc5 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -984,17 +984,17 @@ static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, } #ifdef CONFIG_PAGE_TABLE_CHECK -static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) +static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte) { return pte_present(pte) && pte_user(pte); } -static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr) +static inline bool pmd_user_accessible_page(struct mm_struct *mm, unsigned long addr, pmd_t pmd) { return pmd_leaf(pmd) && pmd_user(pmd); } -static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr) +static inline bool pud_user_accessible_page(struct mm_struct *mm, unsigned long addr, pud_t pud) { return pud_leaf(pud) && pud_user(pud); } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1662c5a8f445..f9353d5c7464 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1680,17 +1680,17 @@ static inline bool arch_has_hw_nonleaf_pmd_young(void) #endif #ifdef CONFIG_PAGE_TABLE_CHECK -static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) +static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte) { return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER); } -static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr) +static inline bool pmd_user_accessible_page(struct mm_struct *mm, unsigned long addr, pmd_t pmd) { return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER); } -static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr) +static inline bool pud_user_accessible_page(struct mm_struct *mm, unsigned long addr, pud_t pud) { return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER); } diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 2708c2b3ac1f..53a8997ec043 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -151,9 +151,8 @@ void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr, if (&init_mm == mm) return; - if (pte_user_accessible_page(pte, addr)) { + if (pte_user_accessible_page(mm, addr, pte)) page_table_check_clear(pte_pfn(pte), PAGE_SIZE >> PAGE_SHIFT); - } } EXPORT_SYMBOL(__page_table_check_pte_clear); @@ -163,9 +162,8 @@ void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr, if (&init_mm == mm) return; - if (pmd_user_accessible_page(pmd, addr)) { + if (pmd_user_accessible_page(mm, addr, pmd)) page_table_check_clear(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT); - } } EXPORT_SYMBOL(__page_table_check_pmd_clear); @@ -175,9 +173,8 @@ void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr, if (&init_mm == mm) return; - if (pud_user_accessible_page(pud, addr)) { + if (pud_user_accessible_page(mm, addr, pud)) page_table_check_clear(pud_pfn(pud), PUD_SIZE >> PAGE_SHIFT); - } } EXPORT_SYMBOL(__page_table_check_pud_clear); @@ -211,7 +208,7 @@ void __page_table_check_ptes_set(struct mm_struct *mm, unsigned long addr, for (i = 0; i < nr; i++) __page_table_check_pte_clear(mm, addr + PAGE_SIZE * i, ptep_get(ptep + i)); - if (pte_user_accessible_page(pte, addr)) + if (pte_user_accessible_page(mm, addr, pte)) page_table_check_set(pte_pfn(pte), nr, pte_write(pte)); } EXPORT_SYMBOL(__page_table_check_ptes_set); @@ -241,7 +238,7 @@ void __page_table_check_pmds_set(struct mm_struct *mm, unsigned long addr, for (i = 0; i < nr; i++) __page_table_check_pmd_clear(mm, addr + PMD_SIZE * i, *(pmdp + i)); - if (pmd_user_accessible_page(pmd, addr)) + if (pmd_user_accessible_page(mm, addr, pmd)) page_table_check_set(pmd_pfn(pmd), stride * nr, pmd_write(pmd)); } EXPORT_SYMBOL(__page_table_check_pmds_set); @@ -257,7 +254,7 @@ void __page_table_check_puds_set(struct mm_struct *mm, unsigned long addr, for (i = 0; i < nr; i++) __page_table_check_pud_clear(mm, addr + PUD_SIZE * i, *(pudp + i)); - if (pud_user_accessible_page(pud, addr)) + if (pud_user_accessible_page(mm, addr, pud)) page_table_check_set(pud_pfn(pud), stride * nr, pud_write(pud)); } EXPORT_SYMBOL(__page_table_check_puds_set); From 2f34c2e609540c6bf806261e2f9a9d8f77b388ef Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 6 Mar 2026 17:16:31 +0100 Subject: [PATCH 2/4] s390/pgtable: Use set_pmd_bit() to invalidate PMD entry Commit 3a5a8d343e1c ("mm: fix race between __split_huge_pmd_locked() and GUP-fast") failed to follow the convention and used direct PMD entry modification instead of set_pmd_bit(). Reviewed-by: Gerald Schaefer Signed-off-by: Alexander Gordeev Link: https://lore.kernel.org/r/a9248694a38cc898d3f0628f59b8abb57d56a416.1772812343.git.agordeev@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1c3c3be93be9..04ec9fee6498 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1744,10 +1744,10 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd; + pmd_t pmd = *pmdp; - VM_WARN_ON_ONCE(!pmd_present(*pmdp)); - pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); + VM_WARN_ON_ONCE(!pmd_present(pmd)); + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd); } From 7b4dde5e40ad68d68a6b3bdc9621cb4cdce54e43 Mon Sep 17 00:00:00 2001 From: Tobias Huschle Date: Fri, 6 Mar 2026 17:16:32 +0100 Subject: [PATCH 3/4] s390/pgtable: Add s390 support for page table check Add page table check hooks into routines that modify user page tables. Unlike other architectures s390 does not have means to distinguish between kernel and user page table entries. Rely on the fact the page table check infrastructure itself operates on non-init_mm memory spaces only. Use the provided mm_struct to verify that the memory space is not init_mm (aka not the kernel memory space) indeed. That check is supposed to be succeeded already (on some code paths even twice). If the passed memory space by contrast is init_mm that would be an unexpected semantical change in generic code, so do VM_BUG_ON() in such case. Unset _SEGMENT_ENTRY_READ bit to indicate that pmdp_invalidate() was applied against a huge PMD and is going to be updated by set_pmd_at() shortly. The hook pmd_user_accessible_page() should skip such entries until that, otherwise the page table accounting falls apart and BUG_ON() gets hit as result. The invalidated huge PMD entry should not be confused with a PROT_NONE entry as reported by pmd_protnone(), though the entry characteristics exactly match: _SEGMENT_ENTRY_LARGE is set while _SEGMENT_ENTRY_READ is unset. Since pmd_protnone() implementation depends on NUMA_BALANCING configuration option, it should not be used in pmd_user_accessible_page() check, which is expected to be CONFIG_NUMA_BALANCING-agnostic. Nevertheless, an invalidated huge PMD is technically still pmd_protnone() entry and it should not break other code paths once _SEGMENT_ENTRY_READ is unset. As of now, all pmd_protnone() checks are done under page table locks or exercise GUP-fast and HMM code paths, which are expected to be safe against concurrent page table updates. Alternative approach would be using the last remaining unused PMD entry bit 0x800 to indicate that pmdp_invalidate() was called on a PMD. That would allow avoiding collisions with pmd_protnone() handling code paths, but saving the bit is more preferable way to go. Reviewed-by: Gerald Schaefer Signed-off-by: Tobias Huschle Co-developed-by: Alexander Gordeev Signed-off-by: Alexander Gordeev Link: https://lore.kernel.org/r/4db8a681205bd555298d62441cdcfca43317a35a.1772812343.git.agordeev@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/include/asm/pgtable.h | 54 ++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index edc927d9e85a..7bda45d30455 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -154,6 +154,7 @@ config S390 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_PAGE_TABLE_CHECK select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 04ec9fee6498..67f5df20a57e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -16,8 +16,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -1190,6 +1192,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, /* At this point the reference through the mapping is still present */ if (mm_is_protected(mm) && pte_present(res)) WARN_ON_ONCE(uv_convert_from_secure_pte(res)); + page_table_check_pte_clear(mm, addr, res); return res; } @@ -1208,6 +1211,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, /* At this point the reference through the mapping is still present */ if (mm_is_protected(vma->vm_mm) && pte_present(res)) WARN_ON_ONCE(uv_convert_from_secure_pte(res)); + page_table_check_pte_clear(vma->vm_mm, addr, res); return res; } @@ -1231,6 +1235,9 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, } else { res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } + + page_table_check_pte_clear(mm, addr, res); + /* Nothing to do */ if (!mm_is_protected(mm) || !pte_present(res)) return res; @@ -1327,6 +1334,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, { if (pte_present(entry)) entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED)); + page_table_check_ptes_set(mm, addr, ptep, entry, nr); for (;;) { set_pte(ptep, entry); if (--nr == 0) @@ -1703,6 +1711,7 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { + page_table_check_pmd_set(mm, addr, pmdp, entry); set_pmd(pmdp, entry); } @@ -1717,7 +1726,11 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + pmd_t pmd; + + pmd = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + page_table_check_pmd_clear(mm, addr, pmd); + return pmd; } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL @@ -1725,12 +1738,17 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, int full) { + pmd_t pmd; + if (full) { - pmd_t pmd = *pmdp; + pmd = *pmdp; set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + page_table_check_pmd_clear(vma->vm_mm, addr, pmd); return pmd; } - return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + pmd = pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + page_table_check_pmd_clear(vma->vm_mm, addr, pmd); + return pmd; } #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH @@ -1748,7 +1766,12 @@ static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma, VM_WARN_ON_ONCE(!pmd_present(pmd)); pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); - return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd); +#ifdef CONFIG_PAGE_TABLE_CHECK + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_READ)); +#endif + page_table_check_pmd_set(vma->vm_mm, addr, pmdp, pmd); + pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd); + return pmd; } #define __HAVE_ARCH_PMDP_SET_WRPROTECT @@ -1783,6 +1806,29 @@ static inline int has_transparent_hugepage(void) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifdef CONFIG_PAGE_TABLE_CHECK +static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte) +{ + VM_BUG_ON(mm == &init_mm); + + return pte_present(pte); +} + +static inline bool pmd_user_accessible_page(struct mm_struct *mm, unsigned long addr, pmd_t pmd) +{ + VM_BUG_ON(mm == &init_mm); + + return pmd_leaf(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_READ); +} + +static inline bool pud_user_accessible_page(struct mm_struct *mm, unsigned long addr, pud_t pud) +{ + VM_BUG_ON(mm == &init_mm); + + return pud_leaf(pud); +} +#endif + /* * 64 bit swap entry format: * A page-table entry has some bits we have to treat in a special way. From 07c4e7a6f6b1d1ac871ae93c203b20144b709ec5 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 6 Mar 2026 17:16:33 +0100 Subject: [PATCH 4/4] s390: Enable page table check for debug_defconfig Reviewed-by: Gerald Schaefer Signed-off-by: Alexander Gordeev Link: https://lore.kernel.org/r/975007c27f8563e46d66a1fbb4b14ae6a4147edd.1772812343.git.agordeev@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 98fd0a2f51c6..12cdaaefb6db 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -929,3 +929,5 @@ CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m +CONFIG_PAGE_TABLE_CHECK=y +CONFIG_PAGE_TABLE_CHECK_ENFORCED=y