mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-03 09:56:50 -04:00
mm: hugetlb: fix incorrect fallback for subpool
During our testing with hugetlb subpool enabled, we observe that
hstate->resv_huge_pages may underflow into negative values. Root cause
analysis reveals a race condition in subpool reservation fallback handling
as follow:
hugetlb_reserve_pages()
/* Attempt subpool reservation */
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
/* Global reservation may fail after subpool allocation */
if (hugetlb_acct_memory(h, gbl_reserve) < 0)
goto out_put_pages;
out_put_pages:
/* This incorrectly restores reservation to subpool */
hugepage_subpool_put_pages(spool, chg);
When hugetlb_acct_memory() fails after subpool allocation, the current
implementation over-commits subpool reservations by returning the full
'chg' value instead of the actual allocated 'gbl_reserve' amount. This
discrepancy propagates to global reservations during subsequent releases,
eventually causing resv_huge_pages underflow.
This problem can be trigger easily with the following steps:
1. reverse hugepage for hugeltb allocation
2. mount hugetlbfs with min_size to enable hugetlb subpool
3. alloc hugepages with two task(make sure the second will fail due to
insufficient amount of hugepages)
4. with for a few seconds and repeat step 3 which will make
hstate->resv_huge_pages to go below zero.
To fix this problem, return corrent amount of pages to subpool during the
fallback after hugepage_subpool_get_pages is called.
Link: https://lkml.kernel.org/r/20250410062633.3102457-1-mawupeng1@huawei.com
Fixes: 1c5ecae3a9 ("hugetlbfs: add minimum size accounting to subpools")
Signed-off-by: Wupeng Ma <mawupeng1@huawei.com>
Tested-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Ma Wupeng <mawupeng1@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
28
mm/hugetlb.c
28
mm/hugetlb.c
@@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
|
||||
struct hugepage_subpool *spool = subpool_vma(vma);
|
||||
struct hstate *h = hstate_vma(vma);
|
||||
struct folio *folio;
|
||||
long retval, gbl_chg;
|
||||
long retval, gbl_chg, gbl_reserve;
|
||||
map_chg_state map_chg;
|
||||
int ret, idx;
|
||||
struct hugetlb_cgroup *h_cg = NULL;
|
||||
@@ -3163,8 +3163,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
|
||||
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
|
||||
h_cg);
|
||||
out_subpool_put:
|
||||
if (map_chg)
|
||||
hugepage_subpool_put_pages(spool, 1);
|
||||
/*
|
||||
* put page to subpool iff the quota of subpool's rsv_hpages is used
|
||||
* during hugepage_subpool_get_pages.
|
||||
*/
|
||||
if (map_chg && !gbl_chg) {
|
||||
gbl_reserve = hugepage_subpool_put_pages(spool, 1);
|
||||
hugetlb_acct_memory(h, -gbl_reserve);
|
||||
}
|
||||
|
||||
|
||||
out_end_reservation:
|
||||
if (map_chg != MAP_CHG_ENFORCED)
|
||||
vma_end_reservation(h, vma, addr);
|
||||
@@ -7239,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
|
||||
struct vm_area_struct *vma,
|
||||
vm_flags_t vm_flags)
|
||||
{
|
||||
long chg = -1, add = -1;
|
||||
long chg = -1, add = -1, spool_resv, gbl_resv;
|
||||
struct hstate *h = hstate_inode(inode);
|
||||
struct hugepage_subpool *spool = subpool_inode(inode);
|
||||
struct resv_map *resv_map;
|
||||
@@ -7374,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
|
||||
return true;
|
||||
|
||||
out_put_pages:
|
||||
/* put back original number of pages, chg */
|
||||
(void)hugepage_subpool_put_pages(spool, chg);
|
||||
spool_resv = chg - gbl_reserve;
|
||||
if (spool_resv) {
|
||||
/* put sub pool's reservation back, chg - gbl_reserve */
|
||||
gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
|
||||
/*
|
||||
* subpool's reserved pages can not be put back due to race,
|
||||
* return to hstate.
|
||||
*/
|
||||
hugetlb_acct_memory(h, -gbl_resv);
|
||||
}
|
||||
out_uncharge_cgroup:
|
||||
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
|
||||
chg * pages_per_huge_page(h), h_cg);
|
||||
|
||||
Reference in New Issue
Block a user