diff --git a/mm/madvise.c b/mm/madvise.c index f1109c2c63a4..764a532ff62a 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -57,17 +57,27 @@ enum madvise_lock_mode { MADVISE_VMA_READ_LOCK, }; +struct madvise_behavior_range { + unsigned long start; + unsigned long end; +}; + struct madvise_behavior { struct mm_struct *mm; int behavior; struct mmu_gather *tlb; enum madvise_lock_mode lock_mode; struct anon_vma_name *anon_name; + + /* + * The range over which the behaviour is currently being applied. If + * traversing multiple VMAs, this is updated for each. + */ + struct madvise_behavior_range range; }; #ifdef CONFIG_ANON_VMA_NAME -static int madvise_walk_vmas(unsigned long start, unsigned long end, - struct madvise_behavior *madv_behavior); +static int madvise_walk_vmas(struct madvise_behavior *madv_behavior); struct anon_vma_name *anon_vma_name_alloc(const char *name) { @@ -147,7 +157,9 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, if (end == start) return 0; - return madvise_walk_vmas(start, end, &madv_behavior); + madv_behavior.range.start = start; + madv_behavior.range.end = end; + return madvise_walk_vmas(&madv_behavior); } #else /* CONFIG_ANON_VMA_NAME */ static int replace_anon_vma_name(struct vm_area_struct *vma, @@ -993,12 +1005,13 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, return -EINVAL; } -static long madvise_populate(unsigned long start, unsigned long end, - struct madvise_behavior *madv_behavior) +static long madvise_populate(struct madvise_behavior *madv_behavior) { struct mm_struct *mm = madv_behavior->mm; const bool write = madv_behavior->behavior == MADV_POPULATE_WRITE; int locked = 1; + unsigned long start = madv_behavior->range.start; + unsigned long end = madv_behavior->range.end; long pages; while (start < end) { @@ -1289,12 +1302,13 @@ static long madvise_guard_remove(struct vm_area_struct *vma, */ static int madvise_vma_behavior(struct vm_area_struct *vma, struct vm_area_struct **prev, - unsigned long start, unsigned long end, struct madvise_behavior *madv_behavior) { int behavior = madv_behavior->behavior; struct anon_vma_name *anon_name = madv_behavior->anon_name; vm_flags_t new_flags = vma->vm_flags; + unsigned long start = madv_behavior->range.start; + unsigned long end = madv_behavior->range.end; bool set_new_anon_name = behavior == __MADV_SET_ANON_VMA_NAME; int error; @@ -1411,10 +1425,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, /* * Error injection support for memory error handling. */ -static int madvise_inject_error(unsigned long start, unsigned long end, - struct madvise_behavior *madv_behavior) +static int madvise_inject_error(struct madvise_behavior *madv_behavior) { unsigned long size; + unsigned long start = madv_behavior->range.start; + unsigned long end = madv_behavior->range.end; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1468,8 +1483,7 @@ static bool is_memory_failure(struct madvise_behavior *madv_behavior) #else -static int madvise_inject_error(unsigned long start, unsigned long end, - struct madvise_behavior *madv_behavior) +static int madvise_inject_error(struct madvise_behavior *madv_behavior) { return 0; } @@ -1551,21 +1565,20 @@ static bool process_madvise_remote_valid(int behavior) * If a VMA read lock could not be acquired, we return NULL and expect caller to * fallback to mmap lock behaviour. */ -static struct vm_area_struct *try_vma_read_lock( - struct madvise_behavior *madv_behavior, - unsigned long start, unsigned long end) +static +struct vm_area_struct *try_vma_read_lock(struct madvise_behavior *madv_behavior) { struct mm_struct *mm = madv_behavior->mm; struct vm_area_struct *vma; - vma = lock_vma_under_rcu(mm, start); + vma = lock_vma_under_rcu(mm, madv_behavior->range.start); if (!vma) goto take_mmap_read_lock; /* * Must span only a single VMA; uffd and remote processes are * unsupported. */ - if (end > vma->vm_end || current->mm != mm || + if (madv_behavior->range.end > vma->vm_end || current->mm != mm || userfaultfd_armed(vma)) { vma_end_read(vma); goto take_mmap_read_lock; @@ -1588,13 +1601,14 @@ static struct vm_area_struct *try_vma_read_lock( * reading or writing. */ static -int madvise_walk_vmas(unsigned long start, unsigned long end, - struct madvise_behavior *madv_behavior) +int madvise_walk_vmas(struct madvise_behavior *madv_behavior) { struct mm_struct *mm = madv_behavior->mm; + struct madvise_behavior_range *range = &madv_behavior->range; + /* range is updated to span each VMA, so store end of entire range. */ + unsigned long last_end = range->end; struct vm_area_struct *vma; struct vm_area_struct *prev; - unsigned long tmp; int unmapped_error = 0; int error; @@ -1603,11 +1617,10 @@ int madvise_walk_vmas(unsigned long start, unsigned long end, * tentatively, avoiding walking VMAs. */ if (madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK) { - vma = try_vma_read_lock(madv_behavior, start, end); + vma = try_vma_read_lock(madv_behavior); if (vma) { prev = vma; - error = madvise_vma_behavior(vma, &prev, start, end, - madv_behavior); + error = madvise_vma_behavior(vma, &prev, madv_behavior); vma_end_read(vma); return error; } @@ -1618,8 +1631,8 @@ int madvise_walk_vmas(unsigned long start, unsigned long end, * ranges, just ignore them, but return -ENOMEM at the end. * - different from the way of handling in mlock etc. */ - vma = find_vma_prev(mm, start, &prev); - if (vma && start > vma->vm_start) + vma = find_vma_prev(mm, range->start, &prev); + if (vma && range->start > vma->vm_start) prev = vma; for (;;) { @@ -1627,33 +1640,30 @@ int madvise_walk_vmas(unsigned long start, unsigned long end, if (!vma) return -ENOMEM; - /* Here start < (end|vma->vm_end). */ - if (start < vma->vm_start) { + /* Here start < (last_end|vma->vm_end). */ + if (range->start < vma->vm_start) { unmapped_error = -ENOMEM; - start = vma->vm_start; - if (start >= end) + range->start = vma->vm_start; + if (range->start >= last_end) break; } - /* Here vma->vm_start <= start < (end|vma->vm_end) */ - tmp = vma->vm_end; - if (end < tmp) - tmp = end; + /* Here vma->vm_start <= range->start < (last_end|vma->vm_end) */ + range->end = min(vma->vm_end, last_end); - /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ - error = madvise_vma_behavior(vma, &prev, start, tmp, - madv_behavior); + /* Here vma->vm_start <= range->start < range->end <= (last_end|vma->vm_end). */ + error = madvise_vma_behavior(vma, &prev, madv_behavior); if (error) return error; - start = tmp; - if (prev && start < prev->vm_end) - start = prev->vm_end; - if (start >= end) + range->start = range->end; + if (prev && range->start < prev->vm_end) + range->start = prev->vm_end; + if (range->start >= last_end) break; if (prev) vma = find_vma(mm, prev->vm_end); else /* madvise_remove dropped mmap_lock */ - vma = find_vma(mm, start); + vma = find_vma(mm, range->start); } return unmapped_error; @@ -1834,22 +1844,23 @@ static int madvise_do_behavior(unsigned long start, size_t len_in, struct madvise_behavior *madv_behavior) { struct blk_plug plug; - unsigned long end; int error; + struct madvise_behavior_range *range = &madv_behavior->range; if (is_memory_failure(madv_behavior)) { - end = start + len_in; - return madvise_inject_error(start, end, madv_behavior); + range->start = start; + range->end = start + len_in; + return madvise_inject_error(madv_behavior); } - start = get_untagged_addr(madv_behavior->mm, start); - end = start + PAGE_ALIGN(len_in); + range->start = get_untagged_addr(madv_behavior->mm, start); + range->end = range->start + PAGE_ALIGN(len_in); blk_start_plug(&plug); if (is_madvise_populate(madv_behavior)) - error = madvise_populate(start, end, madv_behavior); + error = madvise_populate(madv_behavior); else - error = madvise_walk_vmas(start, end, madv_behavior); + error = madvise_walk_vmas(madv_behavior); blk_finish_plug(&plug); return error; }