mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 13:30:45 -05:00
mm: abstract THP allocation
Patch series "Do not shatter hugezeropage on wp-fault", v7. It was observed at [1] and [2] that the current kernel behaviour of shattering a hugezeropage is inconsistent and suboptimal. For a VMA with a THP allowable order, when we write-fault on it, the kernel installs a PMD-mapped THP. On the other hand, if we first get a read fault, we get a PMD pointing to the hugezeropage; subsequent write will trigger a write-protection fault, shattering the hugezeropage into one writable page, and all the other PTEs write-protected. The conclusion being, as compared to the case of a single write-fault, applications have to suffer 512 extra page faults if they were to use the VMA as such, plus we get the overhead of khugepaged trying to replace that area with a THP anyway. Instead, replace the hugezeropage with a THP on wp-fault. [1]: https://lore.kernel.org/all/3743d7e1-0b79-4eaf-82d5-d1ca29fe347d@arm.com/ [2]: https://lore.kernel.org/all/1cfae0c0-96a2-4308-9c62-f7a640520242@arm.com/ This patch (of 2): In preparation for the second patch, abstract away the THP allocation logic present in the create_huge_pmd() path, which corresponds to the faulting case when no page is present. There should be no functional change as a result of applying this patch, except that, as David notes at [1], a PMD-aligned address should be passed to update_mmu_cache_pmd(). [1]: https://lore.kernel.org/all/ddd3fcd2-48b3-4170-bcaa-2fe66e093f43@redhat.com/ Link: https://lkml.kernel.org/r/20241008061746.285961-1-dev.jain@arm.com Link: https://lkml.kernel.org/r/20241008061746.285961-2-dev.jain@arm.com Signed-off-by: Dev Jain <dev.jain@arm.com> Acked-by: David Hildenbrand <david@redhat.com> Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Aneesh Kumar K.V <aneesh.kumar@kernel.org> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Barry Song <baohua@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christoph Lameter <cl@gentwo.org> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Lance Yang <ioworker0@gmail.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Peter Xu <peterx@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Cc: Yang Shi <yang@os.amperecomputing.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
100
mm/huge_memory.c
100
mm/huge_memory.c
@@ -1136,47 +1136,81 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
|
||||
|
||||
static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
|
||||
struct page *page, gfp_t gfp)
|
||||
static struct folio *vma_alloc_anon_folio_pmd(struct vm_area_struct *vma,
|
||||
unsigned long addr)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct folio *folio = page_folio(page);
|
||||
pgtable_t pgtable;
|
||||
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
|
||||
vm_fault_t ret = 0;
|
||||
gfp_t gfp = vma_thp_gfp_mask(vma);
|
||||
const int order = HPAGE_PMD_ORDER;
|
||||
struct folio *folio;
|
||||
|
||||
folio = vma_alloc_folio(gfp, order, vma, addr & HPAGE_PMD_MASK, true);
|
||||
|
||||
if (unlikely(!folio)) {
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
count_mthp_stat(order, MTHP_STAT_ANON_FAULT_FALLBACK);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
|
||||
|
||||
if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) {
|
||||
folio_put(folio);
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
count_vm_event(THP_FAULT_FALLBACK_CHARGE);
|
||||
count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK);
|
||||
count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
|
||||
return VM_FAULT_FALLBACK;
|
||||
count_mthp_stat(order, MTHP_STAT_ANON_FAULT_FALLBACK);
|
||||
count_mthp_stat(order, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
|
||||
return NULL;
|
||||
}
|
||||
folio_throttle_swaprate(folio, gfp);
|
||||
|
||||
folio_zero_user(folio, addr);
|
||||
/*
|
||||
* The memory barrier inside __folio_mark_uptodate makes sure that
|
||||
* folio_zero_user writes become visible before the set_pmd_at()
|
||||
* write.
|
||||
*/
|
||||
__folio_mark_uptodate(folio);
|
||||
return folio;
|
||||
}
|
||||
|
||||
static void map_anon_folio_pmd(struct folio *folio, pmd_t *pmd,
|
||||
struct vm_area_struct *vma, unsigned long haddr)
|
||||
{
|
||||
pmd_t entry;
|
||||
|
||||
entry = mk_huge_pmd(&folio->page, vma->vm_page_prot);
|
||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||
folio_add_new_anon_rmap(folio, vma, haddr, RMAP_EXCLUSIVE);
|
||||
folio_add_lru_vma(folio, vma);
|
||||
set_pmd_at(vma->vm_mm, haddr, pmd, entry);
|
||||
update_mmu_cache_pmd(vma, haddr, pmd);
|
||||
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
|
||||
count_vm_event(THP_FAULT_ALLOC);
|
||||
count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
|
||||
count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
|
||||
}
|
||||
|
||||
static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
|
||||
{
|
||||
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct folio *folio;
|
||||
pgtable_t pgtable;
|
||||
vm_fault_t ret = 0;
|
||||
|
||||
folio = vma_alloc_anon_folio_pmd(vma, vmf->address);
|
||||
if (unlikely(!folio))
|
||||
return VM_FAULT_FALLBACK;
|
||||
|
||||
pgtable = pte_alloc_one(vma->vm_mm);
|
||||
if (unlikely(!pgtable)) {
|
||||
ret = VM_FAULT_OOM;
|
||||
goto release;
|
||||
}
|
||||
|
||||
folio_zero_user(folio, vmf->address);
|
||||
/*
|
||||
* The memory barrier inside __folio_mark_uptodate makes sure that
|
||||
* folio_zero_user writes become visible before the set_pmd_at()
|
||||
* write.
|
||||
*/
|
||||
__folio_mark_uptodate(folio);
|
||||
|
||||
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
|
||||
if (unlikely(!pmd_none(*vmf->pmd))) {
|
||||
goto unlock_release;
|
||||
} else {
|
||||
pmd_t entry;
|
||||
|
||||
ret = check_stable_address_space(vma->vm_mm);
|
||||
if (ret)
|
||||
goto unlock_release;
|
||||
@@ -1190,21 +1224,11 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
|
||||
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
|
||||
return ret;
|
||||
}
|
||||
|
||||
entry = mk_huge_pmd(page, vma->vm_page_prot);
|
||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||
folio_add_new_anon_rmap(folio, vma, haddr, RMAP_EXCLUSIVE);
|
||||
folio_add_lru_vma(folio, vma);
|
||||
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
|
||||
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
|
||||
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
|
||||
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
|
||||
map_anon_folio_pmd(folio, vmf->pmd, vma, haddr);
|
||||
mm_inc_nr_ptes(vma->vm_mm);
|
||||
deferred_split_folio(folio, false);
|
||||
spin_unlock(vmf->ptl);
|
||||
count_vm_event(THP_FAULT_ALLOC);
|
||||
count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
|
||||
count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1271,8 +1295,6 @@ static void set_huge_zero_folio(pgtable_t pgtable, struct mm_struct *mm,
|
||||
vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
gfp_t gfp;
|
||||
struct folio *folio;
|
||||
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
|
||||
vm_fault_t ret;
|
||||
|
||||
@@ -1323,14 +1345,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
gfp = vma_thp_gfp_mask(vma);
|
||||
folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, vma, haddr, true);
|
||||
if (unlikely(!folio)) {
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK);
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp);
|
||||
|
||||
return __do_huge_pmd_anonymous_page(vmf);
|
||||
}
|
||||
|
||||
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
|
||||
|
||||
Reference in New Issue
Block a user