Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas: "These are the arm64 updates for 6.19. The biggest part is the Arm MPAM driver under drivers/resctrl/. There's a patch touching mm/ to handle spurious faults for huge pmd (similar to the pte version). The corresponding arm64 part allows us to avoid the TLB maintenance if a (huge) page is reused after a write fault. There's EFI refactoring to allow runtime services with preemption enabled and the rest is the usual perf/PMU updates and several cleanups/typos. Summary: Core features: - Basic Arm MPAM (Memory system resource Partitioning And Monitoring) driver under drivers/resctrl/ which makes use of the fs/rectrl/ API Perf and PMU: - Avoid cycle counter on multi-threaded CPUs - Extend CSPMU device probing and add additional filtering support for NVIDIA implementations - Add support for the PMUs on the NoC S3 interconnect - Add additional compatible strings for new Cortex and C1 CPUs - Add support for data source filtering to the SPE driver - Add support for i.MX8QM and "DB" PMU in the imx PMU driver Memory managemennt: - Avoid broadcast TLBI if page reused in write fault - Elide TLB invalidation if the old PTE was not valid - Drop redundant cpu_set_*_tcr_t0sz() macros - Propagate pgtable_alloc() errors outside of __create_pgd_mapping() - Propagate return value from __change_memory_common() ACPI and EFI: - Call EFI runtime services without disabling preemption - Remove unused ACPI function Miscellaneous: - ptrace support to disable streaming on SME-only systems - Improve sysreg generation to include a 'Prefix' descriptor - Replace __ASSEMBLY__ with __ASSEMBLER__ - Align register dumps in the kselftest zt-test - Remove some no longer used macros/functions - Various spelling corrections" * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (94 commits) arm64/mm: Document why linear map split failure upon vm_reset_perms is not problematic arm64/pageattr: Propagate return value from __change_memory_common arm64/sysreg: Remove unused define ARM64_FEATURE_FIELD_BITS KVM: arm64: selftests: Consider all 7 possible levels of cache KVM: arm64: selftests: Remove ARM64_FEATURE_FIELD_BITS and its last user arm64: atomics: lse: Remove unused parameters from ATOMIC_FETCH_OP_AND macros Documentation/arm64: Fix the typo of register names ACPI: GTDT: Get rid of acpi_arch_timer_mem_init() perf: arm_spe: Add support for filtering on data source perf: Add perf_event_attr::config4 perf/imx_ddr: Add support for PMU in DB (system interconnects) perf/imx_ddr: Get and enable optional clks perf/imx_ddr: Move ida_alloc() from ddr_perf_init() to ddr_perf_probe() dt-bindings: perf: fsl-imx-ddr: Add compatible string for i.MX8QM, i.MX8QXP and i.MX8DXL arm64: remove duplicate ARCH_HAS_MEM_ENCRYPT arm64: mm: use untagged address to calculate page index MAINTAINERS: new entry for MPAM Driver arm_mpam: Add kunit tests for props_mismatch() arm_mpam: Add kunit test for bitmap reset arm_mpam: Add helper to reset saved mbwu state ...
2025-12-27 10:01:39 -05:00 · 2025-12-02 17:03:55 -08:00
parent 2547f79b0b edde060637
commit 44fc84337b
150 changed files with 5997 additions and 711 deletions
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1642,17 +1642,30 @@ vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio,
 EXPORT_SYMBOL_GPL(vmf_insert_folio_pud);
 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */

-void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
+/**
+ * touch_pmd - Mark page table pmd entry as accessed and dirty (for write)
+ * @vma: The VMA covering @addr
+ * @addr: The virtual address
+ * @pmd: pmd pointer into the page table mapping @addr
+ * @write: Whether it's a write access
+ *
+ * Return: whether the pmd entry is changed
+ */
+bool touch_pmd(struct vm_area_struct *vma, unsigned long addr,
 	       pmd_t *pmd, bool write)
 {
-	pmd_t _pmd;
+	pmd_t entry;

-	_pmd = pmd_mkyoung(*pmd);
+	entry = pmd_mkyoung(*pmd);
 	if (write)
-		_pmd = pmd_mkdirty(_pmd);
+		entry = pmd_mkdirty(entry);
 	if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
-				  pmd, _pmd, write))
+				  pmd, entry, write)) {
 		update_mmu_cache_pmd(vma, addr, pmd);
+		return true;
+	}
+
+	return false;
 }

 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -1842,18 +1855,14 @@ void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 }
 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */

-void huge_pmd_set_accessed(struct vm_fault *vmf)
+bool huge_pmd_set_accessed(struct vm_fault *vmf)
 {
 	bool write = vmf->flags & FAULT_FLAG_WRITE;

-	vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
 	if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd)))
-		goto unlock;
+		return false;

-	touch_pmd(vmf->vma, vmf->address, vmf->pmd, write);
-
-unlock:
-	spin_unlock(vmf->ptl);
+	return touch_pmd(vmf->vma, vmf->address, vmf->pmd, write);
 }

 static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1402,7 +1402,7 @@ int __must_check try_grab_folio(struct folio *folio, int refs,
 */
 void touch_pud(struct vm_area_struct *vma, unsigned long addr,
 	       pud_t *pud, bool write);
-void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
+bool touch_pmd(struct vm_area_struct *vma, unsigned long addr,
 	       pmd_t *pmd, bool write);

 /*
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6133,6 +6133,45 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
 	return VM_FAULT_FALLBACK;
 }

+/*
+ * The page faults may be spurious because of the racy access to the
+ * page table.  For example, a non-populated virtual page is accessed
+ * on 2 CPUs simultaneously, thus the page faults are triggered on
+ * both CPUs.  However, it's possible that one CPU (say CPU A) cannot
+ * find the reason for the page fault if the other CPU (say CPU B) has
+ * changed the page table before the PTE is checked on CPU A.  Most of
+ * the time, the spurious page faults can be ignored safely.  However,
+ * if the page fault is for the write access, it's possible that a
+ * stale read-only TLB entry exists in the local CPU and needs to be
+ * flushed on some architectures.  This is called the spurious page
+ * fault fixing.
+ *
+ * Note: flush_tlb_fix_spurious_fault() is defined as flush_tlb_page()
+ * by default and used as such on most architectures, while
+ * flush_tlb_fix_spurious_fault_pmd() is defined as NOP by default and
+ * used as such on most architectures.
+ */
+static void fix_spurious_fault(struct vm_fault *vmf,
+			       enum pgtable_level ptlevel)
+{
+	/* Skip spurious TLB flush for retried page fault */
+	if (vmf->flags & FAULT_FLAG_TRIED)
+		return;
+	/*
+	 * This is needed only for protection faults but the arch code
+	 * is not yet telling us if this is a protection fault or not.
+	 * This still avoids useless tlb flushes for .text page faults
+	 * with threads.
+	 */
+	if (vmf->flags & FAULT_FLAG_WRITE) {
+		if (ptlevel == PGTABLE_LEVEL_PTE)
+			flush_tlb_fix_spurious_fault(vmf->vma, vmf->address,
+						     vmf->pte);
+		else
+			flush_tlb_fix_spurious_fault_pmd(vmf->vma, vmf->address,
+							 vmf->pmd);
+	}
+}
 /*
 * These routines also need to handle stuff like marking pages dirty
 * and/or accessed for architectures that don't do it in hardware (most
@@ -6214,23 +6253,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 	}
 	entry = pte_mkyoung(entry);
 	if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
-				vmf->flags & FAULT_FLAG_WRITE)) {
+				vmf->flags & FAULT_FLAG_WRITE))
 		update_mmu_cache_range(vmf, vmf->vma, vmf->address,
 				vmf->pte, 1);
-	} else {
-		/* Skip spurious TLB flush for retried page fault */
-		if (vmf->flags & FAULT_FLAG_TRIED)
-			goto unlock;
-		/*
-		 * This is needed only for protection faults but the arch code
-		 * is not yet telling us if this is a protection fault or not.
-		 * This still avoids useless tlb flushes for .text page faults
-		 * with threads.
-		 */
-		if (vmf->flags & FAULT_FLAG_WRITE)
-			flush_tlb_fix_spurious_fault(vmf->vma, vmf->address,
-						     vmf->pte);
-	}
+	else
+		fix_spurious_fault(vmf, PGTABLE_LEVEL_PTE);
 unlock:
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
 	return 0;
@@ -6327,7 +6354,10 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 				if (!(ret & VM_FAULT_FALLBACK))
 					return ret;
 			} else {
-				huge_pmd_set_accessed(&vmf);
+				vmf.ptl = pmd_lock(mm, vmf.pmd);
+				if (!huge_pmd_set_accessed(&vmf))
+					fix_spurious_fault(&vmf, PGTABLE_LEVEL_PMD);
+				spin_unlock(vmf.ptl);
 				return 0;
 			}
 		}