Merge tag 'mm-hotfixes-stable-2025-03-17-20-09' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc hotfixes from Andrew Morton:
 "15 hotfixes. 7 are cc:stable and the remainder address post-6.13
  issues or aren't considered necessary for -stable kernels.

  13 are for MM and the other two are for squashfs and procfs.

  All are singletons. Please see the individual changelogs for details"

* tag 'mm-hotfixes-stable-2025-03-17-20-09' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm/page_alloc: fix memory accept before watermarks gets initialized
  mm: decline to manipulate the refcount on a slab page
  memcg: drain obj stock on cpu hotplug teardown
  mm/huge_memory: drop beyond-EOF folios with the right number of refs
  selftests/mm: run_vmtests.sh: fix half_ufd_size_MB calculation
  mm: fix error handling in __filemap_get_folio() with FGP_NOWAIT
  mm: memcontrol: fix swap counter leak from offline cgroup
  mm/vma: do not register private-anon mappings with khugepaged during mmap
  squashfs: fix invalid pointer dereference in squashfs_cache_delete
  mm/migrate: fix shmem xarray update during migration
  mm/hugetlb: fix surplus pages in dissolve_free_huge_page()
  mm/damon/core: initialize damos->walk_completed in damon_new_scheme()
  mm/damon: respect core layer filters' allowance decision on ops layer
  filemap: move prefaulting out of hot write path
  proc: fix UAF in proc_get_inode()
This commit is contained in:
Linus Torvalds
2025-03-17 22:27:27 -07:00
20 changed files with 132 additions and 43 deletions

View File

@@ -559,10 +559,16 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
return p;
}
static inline void pde_set_flags(struct proc_dir_entry *pde)
static void pde_set_flags(struct proc_dir_entry *pde)
{
if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT)
pde->flags |= PROC_ENTRY_PERMANENT;
if (pde->proc_ops->proc_read_iter)
pde->flags |= PROC_ENTRY_proc_read_iter;
#ifdef CONFIG_COMPAT
if (pde->proc_ops->proc_compat_ioctl)
pde->flags |= PROC_ENTRY_proc_compat_ioctl;
#endif
}
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
@@ -626,6 +632,7 @@ struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
p->proc_ops = &proc_seq_ops;
p->seq_ops = ops;
p->state_size = state_size;
pde_set_flags(p);
return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_seq_private);
@@ -656,6 +663,7 @@ struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
return NULL;
p->proc_ops = &proc_single_ops;
p->single_show = show;
pde_set_flags(p);
return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_single_data);

View File

@@ -656,13 +656,13 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
if (S_ISREG(inode->i_mode)) {
inode->i_op = de->proc_iops;
if (de->proc_ops->proc_read_iter)
if (pde_has_proc_read_iter(de))
inode->i_fop = &proc_iter_file_ops;
else
inode->i_fop = &proc_reg_file_ops;
#ifdef CONFIG_COMPAT
if (de->proc_ops->proc_compat_ioctl) {
if (de->proc_ops->proc_read_iter)
if (pde_has_proc_compat_ioctl(de)) {
if (pde_has_proc_read_iter(de))
inode->i_fop = &proc_iter_file_ops_compat;
else
inode->i_fop = &proc_reg_file_ops_compat;

View File

@@ -85,6 +85,20 @@ static inline void pde_make_permanent(struct proc_dir_entry *pde)
pde->flags |= PROC_ENTRY_PERMANENT;
}
static inline bool pde_has_proc_read_iter(const struct proc_dir_entry *pde)
{
return pde->flags & PROC_ENTRY_proc_read_iter;
}
static inline bool pde_has_proc_compat_ioctl(const struct proc_dir_entry *pde)
{
#ifdef CONFIG_COMPAT
return pde->flags & PROC_ENTRY_proc_compat_ioctl;
#else
return false;
#endif
}
extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);

View File

@@ -198,7 +198,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
{
int i, j;
if (cache == NULL)
if (IS_ERR(cache) || cache == NULL)
return;
for (i = 0; i < cache->entries; i++) {

View File

@@ -470,6 +470,11 @@ struct damos {
unsigned long next_apply_sis;
/* informs if ongoing DAMOS walk for this scheme is finished */
bool walk_completed;
/*
* If the current region in the filtering stage is allowed by core
* layer-handled filters. If true, operations layer allows it, too.
*/
bool core_filters_allowed;
/* public: */
struct damos_quota quota;
struct damos_watermarks wmarks;

View File

@@ -1458,7 +1458,10 @@ static inline void folio_get(struct folio *folio)
static inline void get_page(struct page *page)
{
folio_get(page_folio(page));
struct folio *folio = page_folio(page);
if (WARN_ON_ONCE(folio_test_slab(folio)))
return;
folio_get(folio);
}
static inline __must_check bool try_get_page(struct page *page)
@@ -1552,6 +1555,9 @@ static inline void put_page(struct page *page)
{
struct folio *folio = page_folio(page);
if (folio_test_slab(folio))
return;
/*
* For some devmap managed pages we need to catch refcount transition
* from 2 to 1:

View File

@@ -20,10 +20,13 @@ enum {
* If in doubt, ignore this flag.
*/
#ifdef MODULE
PROC_ENTRY_PERMANENT = 0U,
PROC_ENTRY_PERMANENT = 0U,
#else
PROC_ENTRY_PERMANENT = 1U << 0,
PROC_ENTRY_PERMANENT = 1U << 0,
#endif
PROC_ENTRY_proc_read_iter = 1U << 1,
PROC_ENTRY_proc_compat_ioctl = 1U << 2,
};
struct proc_ops {

View File

@@ -6,7 +6,7 @@
#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
extern void swap_cgroup_record(struct folio *folio, swp_entry_t ent);
extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent);
extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents);
extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
@@ -15,7 +15,7 @@ extern void swap_cgroup_swapoff(int type);
#else
static inline
void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent)
{
}

View File

@@ -1190,8 +1190,12 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
if (!n)
return -ENOMEM;
p = *pages;
for (int k = 0; k < n; k++)
get_page(p[k] = page + k);
for (int k = 0; k < n; k++) {
struct folio *folio = page_folio(page);
p[k] = page + k;
if (!folio_test_slab(folio))
folio_get(folio);
}
maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start);
i->count -= maxsize;
i->iov_offset += maxsize;

View File

@@ -373,6 +373,7 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
* or damon_attrs are updated.
*/
scheme->next_apply_sis = 0;
scheme->walk_completed = false;
INIT_LIST_HEAD(&scheme->filters);
scheme->stat = (struct damos_stat){};
INIT_LIST_HEAD(&scheme->list);
@@ -1429,9 +1430,13 @@ static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
{
struct damos_filter *filter;
s->core_filters_allowed = false;
damos_for_each_filter(filter, s) {
if (damos_filter_match(ctx, t, r, filter))
if (damos_filter_match(ctx, t, r, filter)) {
if (filter->allow)
s->core_filters_allowed = true;
return !filter->allow;
}
}
return false;
}

View File

@@ -236,6 +236,9 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio)
{
struct damos_filter *filter;
if (scheme->core_filters_allowed)
return false;
damos_for_each_filter(filter, scheme) {
if (damos_pa_filter_match(filter, folio))
return !filter->allow;

View File

@@ -1985,8 +1985,19 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
if (err == -EEXIST)
goto repeat;
if (err)
if (err) {
/*
* When NOWAIT I/O fails to allocate folios this could
* be due to a nonblocking memory allocation and not
* because the system actually is out of memory.
* Return -EAGAIN so that there caller retries in a
* blocking fashion instead of propagating -ENOMEM
* to the application.
*/
if ((fgp_flags & FGP_NOWAIT) && err == -ENOMEM)
err = -EAGAIN;
return ERR_PTR(err);
}
/*
* filemap_add_folio locks the page, and for mmap
* we expect an unlocked page.
@@ -4083,17 +4094,6 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
bytes = min(chunk - offset, bytes);
balance_dirty_pages_ratelimited(mapping);
/*
* Bring in the user page that we will copy from _first_.
* Otherwise there's a nasty deadlock on copying from the
* same page as we're writing to, without it being marked
* up-to-date.
*/
if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
status = -EFAULT;
break;
}
if (fatal_signal_pending(current)) {
status = -EINTR;
break;
@@ -4111,6 +4111,12 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
if (mapping_writably_mapped(mapping))
flush_dcache_folio(folio);
/*
* Faults here on mmap()s can recurse into arbitrary
* filesystem code. Lots of locks are held that can
* deadlock. Use an atomic copy to avoid deadlocking
* in page fault handling.
*/
copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
flush_dcache_folio(folio);
@@ -4136,6 +4142,16 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
bytes = copied;
goto retry;
}
/*
* 'folio' is now unlocked and faults on it can be
* handled. Ensure forward progress by trying to
* fault it in now.
*/
if (fault_in_iov_iter_readable(i, bytes) == bytes) {
status = -EFAULT;
break;
}
} else {
pos += status;
written += status;

View File

@@ -3304,7 +3304,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
folio_account_cleaned(tail,
inode_to_wb(folio->mapping->host));
__filemap_remove_folio(tail, NULL);
folio_put(tail);
folio_put_refs(tail, folio_nr_pages(tail));
} else if (!folio_test_anon(folio)) {
__xa_store(&folio->mapping->i_pages, tail->index,
tail, 0);

View File

@@ -2135,6 +2135,8 @@ int dissolve_free_hugetlb_folio(struct folio *folio)
if (!folio_ref_count(folio)) {
struct hstate *h = folio_hstate(folio);
bool adjust_surplus = false;
if (!available_huge_pages(h))
goto out;
@@ -2157,7 +2159,9 @@ int dissolve_free_hugetlb_folio(struct folio *folio)
goto retry;
}
remove_hugetlb_folio(h, folio, false);
if (h->surplus_huge_pages_node[folio_nid(folio)])
adjust_surplus = true;
remove_hugetlb_folio(h, folio, adjust_surplus);
h->max_huge_pages--;
spin_unlock_irq(&hugetlb_lock);
@@ -2177,7 +2181,7 @@ int dissolve_free_hugetlb_folio(struct folio *folio)
rc = hugetlb_vmemmap_restore_folio(h, folio);
if (rc) {
spin_lock_irq(&hugetlb_lock);
add_hugetlb_folio(h, folio, false);
add_hugetlb_folio(h, folio, adjust_surplus);
h->max_huge_pages++;
goto out;
}

View File

@@ -1921,9 +1921,18 @@ void drain_all_stock(struct mem_cgroup *root_memcg)
static int memcg_hotplug_cpu_dead(unsigned int cpu)
{
struct memcg_stock_pcp *stock;
struct obj_cgroup *old;
unsigned long flags;
stock = &per_cpu(memcg_stock, cpu);
/* drain_obj_stock requires stock_lock */
local_lock_irqsave(&memcg_stock.stock_lock, flags);
old = drain_obj_stock(stock);
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
drain_stock(stock);
obj_cgroup_put(old);
return 0;
}
@@ -4993,7 +5002,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
mem_cgroup_id_get_many(swap_memcg, nr_entries - 1);
mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
swap_cgroup_record(folio, entry);
swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry);
folio_unqueue_deferred_split(folio);
folio->memcg_data = 0;
@@ -5055,7 +5064,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
mem_cgroup_id_get_many(memcg, nr_pages - 1);
mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
swap_cgroup_record(folio, entry);
swap_cgroup_record(folio, mem_cgroup_id(memcg), entry);
return 0;
}

View File

@@ -518,15 +518,13 @@ static int __folio_migrate_mapping(struct address_space *mapping,
if (folio_test_anon(folio) && folio_test_large(folio))
mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1);
folio_ref_add(newfolio, nr); /* add cache reference */
if (folio_test_swapbacked(folio)) {
if (folio_test_swapbacked(folio))
__folio_set_swapbacked(newfolio);
if (folio_test_swapcache(folio)) {
folio_set_swapcache(newfolio);
newfolio->private = folio_get_private(folio);
}
if (folio_test_swapcache(folio)) {
folio_set_swapcache(newfolio);
newfolio->private = folio_get_private(folio);
entries = nr;
} else {
VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio);
entries = 1;
}

View File

@@ -7004,7 +7004,7 @@ static inline bool has_unaccepted_memory(void)
static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
long to_accept;
long to_accept, wmark;
bool ret = false;
if (!has_unaccepted_memory())
@@ -7013,8 +7013,18 @@ static bool cond_accept_memory(struct zone *zone, unsigned int order)
if (list_empty(&zone->unaccepted_pages))
return false;
wmark = promo_wmark_pages(zone);
/*
* Watermarks have not been initialized yet.
*
* Accepting one MAX_ORDER page to ensure progress.
*/
if (!wmark)
return try_to_accept_memory_one(zone);
/* How much to accept to get to promo watermark? */
to_accept = promo_wmark_pages(zone) -
to_accept = wmark -
(zone_page_state(zone, NR_FREE_PAGES) -
__zone_watermark_unusable_free(zone, order, 0) -
zone_page_state(zone, NR_UNACCEPTED));

View File

@@ -58,9 +58,11 @@ static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map,
* entries must not have been charged
*
* @folio: the folio that the swap entry belongs to
* @id: mem_cgroup ID to be recorded
* @ent: the first swap entry to be recorded
*/
void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
void swap_cgroup_record(struct folio *folio, unsigned short id,
swp_entry_t ent)
{
unsigned int nr_ents = folio_nr_pages(folio);
struct swap_cgroup *map;
@@ -72,8 +74,7 @@ void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
map = swap_cgroup_ctrl[swp_type(ent)].map;
do {
old = __swap_cgroup_id_xchg(map, offset,
mem_cgroup_id(folio_memcg(folio)));
old = __swap_cgroup_id_xchg(map, offset, id);
VM_BUG_ON(old);
} while (++offset != end);
}

View File

@@ -2381,7 +2381,8 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
* vma_merge_new_range() calls khugepaged_enter_vma() too, the below
* call covers the non-merge case.
*/
khugepaged_enter_vma(vma, map->flags);
if (!vma_is_anonymous(vma))
khugepaged_enter_vma(vma, map->flags);
ksm_add_vma(vma);
*vmap = vma;
return 0;

View File

@@ -304,7 +304,9 @@ uffd_stress_bin=./uffd-stress
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
# Hugetlb tests require source and destination huge pages. Pass in half
# the size of the free pages we have, which is used for *each*.
half_ufd_size_MB=$((freepgs / 2))
# uffd-stress expects a region expressed in MiB, so we adjust
# half_ufd_size_MB accordingly.
half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2))
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16