mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap

The root anon_vma of all anon_vma's linked to a VMA must by definition be
the same - a VMA and all of its descendants/ancestors must exist in the
same CoW chain.

Commit bb4aa39676 ("mm: avoid repeated anon_vma lock/unlock sequences in
anon_vma_clone()") introduced paranoid checking of the root anon_vma
remaining the same throughout all AVC's in 2011.

I think 15 years later we can safely assume that this is always the case.

Additionally, since unfaulted VMAs being cloned from or unlinked are
no-op's, we can simply lock the anon_vma's associated with this rather
than doing any specific dance around this.

This removes unnecessary checks and makes it clear that the root anon_vma
is shared between all anon_vma's in a given VMA's anon_vma_chain.

Link: https://lkml.kernel.org/r/838030d2f0772b99fa99ff4b4fd571353f14a1a9.1768746221.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: Chris Li <chriscli@google.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Lorenzo Stoakes
2026-01-18 14:50:40 +00:00
committed by Andrew Morton
parent 69e9458455
commit 535f6b8df1

View File

@@ -231,32 +231,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
return -ENOMEM;
}
/*
* This is a useful helper function for locking the anon_vma root as
* we traverse the vma->anon_vma_chain, looping over anon_vma's that
* have the same vma.
*
* Such anon_vma's should have the same root, so you'd expect to see
* just a single mutex_lock for the whole traversal.
*/
static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
{
struct anon_vma *new_root = anon_vma->root;
if (new_root != root) {
if (WARN_ON_ONCE(root))
up_write(&root->rwsem);
root = new_root;
down_write(&root->rwsem);
}
return root;
}
static inline void unlock_anon_vma_root(struct anon_vma *root)
{
if (root)
up_write(&root->rwsem);
}
static void check_anon_vma_clone(struct vm_area_struct *dst,
struct vm_area_struct *src)
{
@@ -309,26 +283,28 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma);
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
struct anon_vma_chain *avc, *pavc;
struct anon_vma *root = NULL;
check_anon_vma_clone(dst, src);
if (!src->anon_vma)
return 0;
check_anon_vma_clone(dst, src);
/* All anon_vma's share the same root. */
anon_vma_lock_write(src->anon_vma);
list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma;
avc = anon_vma_chain_alloc(GFP_NOWAIT);
if (unlikely(!avc)) {
unlock_anon_vma_root(root);
root = NULL;
anon_vma_unlock_write(src->anon_vma);
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto enomem_failure;
anon_vma_lock_write(src->anon_vma);
}
anon_vma = pavc->anon_vma;
root = lock_anon_vma_root(root, anon_vma);
anon_vma_chain_link(dst, avc, anon_vma);
/*
@@ -345,7 +321,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
}
if (dst->anon_vma)
dst->anon_vma->num_active_vmas++;
unlock_anon_vma_root(root);
anon_vma_unlock_write(src->anon_vma);
return 0;
enomem_failure:
@@ -475,17 +452,19 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma)
void unlink_anon_vmas(struct vm_area_struct *vma)
{
struct anon_vma_chain *avc, *next;
struct anon_vma *root = NULL;
struct anon_vma *active_anon_vma = vma->anon_vma;
/* Always hold mmap lock, read-lock on unmap possibly. */
mmap_assert_locked(vma->vm_mm);
/* Unfaulted is a no-op. */
if (!vma->anon_vma) {
if (!active_anon_vma) {
VM_WARN_ON_ONCE(!list_empty(&vma->anon_vma_chain));
return;
}
anon_vma_lock_write(active_anon_vma);
/*
* Unlink each anon_vma chained to the VMA. This list is ordered
* from newest to oldest, ensuring the root anon_vma gets freed last.
@@ -493,7 +472,6 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma = avc->anon_vma;
root = lock_anon_vma_root(root, anon_vma);
anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
/*
@@ -509,13 +487,14 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
anon_vma_chain_free(avc);
}
vma->anon_vma->num_active_vmas--;
active_anon_vma->num_active_vmas--;
/*
* vma would still be needed after unlink, and anon_vma will be prepared
* when handle fault.
*/
vma->anon_vma = NULL;
unlock_anon_vma_root(root);
anon_vma_unlock_write(active_anon_vma);
/*
* Iterate the list once more, it now only contains empty and unlinked