From 7e97de0b033bcac4fa9a35cef72e0c06e6a22c67 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 2 Aug 2018 15:36:01 -0700 Subject: [PATCH 1/3] memcg: remove memcg_cgroup::id from IDR on mem_cgroup_css_alloc() failure In case of memcg_online_kmem() failure, memcg_cgroup::id remains hashed in mem_cgroup_idr even after memcg memory is freed. This leads to leak of ID in mem_cgroup_idr. This patch adds removal into mem_cgroup_css_alloc(), which fixes the problem. For better readability, it adds a generic helper which is used in mem_cgroup_alloc() and mem_cgroup_id_put_many() as well. Link: http://lkml.kernel.org/r/152354470916.22460.14397070748001974638.stgit@localhost.localdomain Fixes 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") Signed-off-by: Kirill Tkhai Acked-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8c0280b3143e..b2173f7e5164 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4037,6 +4037,14 @@ static struct cftype mem_cgroup_legacy_files[] = { static DEFINE_IDR(mem_cgroup_idr); +static void mem_cgroup_id_remove(struct mem_cgroup *memcg) +{ + if (memcg->id.id > 0) { + idr_remove(&mem_cgroup_idr, memcg->id.id); + memcg->id.id = 0; + } +} + static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0); @@ -4047,8 +4055,7 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) < n); if (atomic_sub_and_test(n, &memcg->id.ref)) { - idr_remove(&mem_cgroup_idr, memcg->id.id); - memcg->id.id = 0; + mem_cgroup_id_remove(memcg); /* Memcg ID pins CSS */ css_put(&memcg->css); @@ -4185,8 +4192,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); return memcg; fail: - if (memcg->id.id > 0) - idr_remove(&mem_cgroup_idr, memcg->id.id); + mem_cgroup_id_remove(memcg); __mem_cgroup_free(memcg); return NULL; } @@ -4245,6 +4251,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &memcg->css; fail: + mem_cgroup_id_remove(memcg); mem_cgroup_free(memcg); return ERR_PTR(-ENOMEM); } From eec3636ad198d4ac61e574cb122cb67e9bef5492 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Thu, 2 Aug 2018 15:36:05 -0700 Subject: [PATCH 2/3] ipc/shm.c add ->pagesize function to shm_vm_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 05ea88608d4e ("mm, hugetlbfs: introduce ->pagesize() to vm_operations_struct") adds a new ->pagesize() function to hugetlb_vm_ops, intended to cover all hugetlbfs backed files. With System V shared memory model, if "huge page" is specified, the "shared memory" is backed by hugetlbfs files, but the mappings initiated via shmget/shmat have their original vm_ops overwritten with shm_vm_ops, so we need to add a ->pagesize function to shm_vm_ops. Otherwise, vma_kernel_pagesize() returns PAGE_SIZE given a hugetlbfs backed vma, result in below BUG: fs/hugetlbfs/inode.c 443 if (unlikely(page_mapped(page))) { 444 BUG_ON(truncate_op); resulting in hugetlbfs: oracle (4592): Using mlock ulimits for SHM_HUGETLB is deprecated ------------[ cut here ]------------ kernel BUG at fs/hugetlbfs/inode.c:444! Modules linked in: nfsv3 rpcsec_gss_krb5 nfsv4 ... CPU: 35 PID: 5583 Comm: oracle_5583_sbt Not tainted 4.14.35-1829.el7uek.x86_64 #2 RIP: 0010:remove_inode_hugepages+0x3db/0x3e2 .... Call Trace: hugetlbfs_evict_inode+0x1e/0x3e evict+0xdb/0x1af iput+0x1a2/0x1f7 dentry_unlink_inode+0xc6/0xf0 __dentry_kill+0xd8/0x18d dput+0x1b5/0x1ed __fput+0x18b/0x216 ____fput+0xe/0x10 task_work_run+0x90/0xa7 exit_to_usermode_loop+0xdd/0x116 do_syscall_64+0x187/0x1ae entry_SYSCALL_64_after_hwframe+0x150/0x0 [jane.chu@oracle.com: relocate comment] Link: http://lkml.kernel.org/r/20180731044831.26036-1-jane.chu@oracle.com Link: http://lkml.kernel.org/r/20180727211727.5020-1-jane.chu@oracle.com Fixes: 05ea88608d4e13 ("mm, hugetlbfs: introduce ->pagesize() to vm_operations_struct") Signed-off-by: Jane Chu Suggested-by: Mike Kravetz Reviewed-by: Mike Kravetz Acked-by: Davidlohr Bueso Acked-by: Michal Hocko Cc: Dan Williams Cc: Jan Kara Cc: Jérôme Glisse Cc: Manfred Spraul Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 12 ++++++++++++ mm/hugetlb.c | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/ipc/shm.c b/ipc/shm.c index 051a3e1fb8df..fefa00d310fb 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -427,6 +427,17 @@ static int shm_split(struct vm_area_struct *vma, unsigned long addr) return 0; } +static unsigned long shm_pagesize(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + if (sfd->vm_ops->pagesize) + return sfd->vm_ops->pagesize(vma); + + return PAGE_SIZE; +} + #ifdef CONFIG_NUMA static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) { @@ -554,6 +565,7 @@ static const struct vm_operations_struct shm_vm_ops = { .close = shm_close, /* callback for when the vm-area is released */ .fault = shm_fault, .split = shm_split, + .pagesize = shm_pagesize, #if defined(CONFIG_NUMA) .set_policy = shm_set_policy, .get_policy = shm_get_policy, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 039ddbc574e9..3103099f64fd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3167,6 +3167,13 @@ static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf) return 0; } +/* + * When a new function is introduced to vm_operations_struct and added + * to hugetlb_vm_ops, please consider adding the function to shm_vm_ops. + * This is because under System V memory model, mappings created via + * shmget/shmat with "huge page" specified are backed by hugetlbfs files, + * their original vm_ops are overwritten with shm_vm_ops. + */ const struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, From 31e810aa1033a7db50a2746cd34a2432237f6420 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 2 Aug 2018 15:36:09 -0700 Subject: [PATCH 3/3] userfaultfd: remove uffd flags from vma->vm_flags if UFFD_EVENT_FORK fails The fix in commit 0cbb4b4f4c44 ("userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails") cleared the vma->vm_userfaultfd_ctx but kept userfaultfd flags in vma->vm_flags that were copied from the parent process VMA. As the result, there is an inconsistency between the values of vma->vm_userfaultfd_ctx.ctx and vma->vm_flags which triggers BUG_ON in userfaultfd_release(). Clearing the uffd flags from vma->vm_flags in case of UFFD_EVENT_FORK failure resolves the issue. Link: http://lkml.kernel.org/r/1532931975-25473-1-git-send-email-rppt@linux.vnet.ibm.com Fixes: 0cbb4b4f4c44 ("userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails") Signed-off-by: Mike Rapoport Reported-by: syzbot+121be635a7a35ddb7dcb@syzkaller.appspotmail.com Cc: Andrea Arcangeli Cc: Eric Biggers Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 594d192b2331..bad9cea37f12 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -633,8 +633,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ down_write(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); + } up_write(&mm->mmap_sem); userfaultfd_ctx_put(release_new_ctx);