From 6b47d35d4d9e6a3fc7b456bb7f84aea807df5b11 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 19 Feb 2025 10:22:24 -0700 Subject: [PATCH 1/3] eventpoll: abstract out parameter sanity checking Add a helper that checks the validity of the file descriptor and other parameters passed in to epoll_wait(). Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20250219172552.1565603-2-axboe@kernel.dk Signed-off-by: Christian Brauner --- fs/eventpoll.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 7c0980db77b3..565bf451df82 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2445,6 +2445,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, return do_epoll_ctl(epfd, op, fd, &epds, false); } +static int ep_check_params(struct file *file, struct epoll_event __user *evs, + int maxevents) +{ + /* The maximum number of event must be greater than zero */ + if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) + return -EINVAL; + + /* Verify that the area passed by the user is writeable */ + if (!access_ok(evs, maxevents * sizeof(struct epoll_event))) + return -EFAULT; + + /* + * We have to check that the file structure underneath the fd + * the user passed to us _is_ an eventpoll file. + */ + if (!is_file_epoll(file)) + return -EINVAL; + + return 0; +} + /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_wait(2). @@ -2453,26 +2474,16 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events, int maxevents, struct timespec64 *to) { struct eventpoll *ep; - - /* The maximum number of event must be greater than zero */ - if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) - return -EINVAL; - - /* Verify that the area passed by the user is writeable */ - if (!access_ok(events, maxevents * sizeof(struct epoll_event))) - return -EFAULT; + int ret; /* Get the "struct file *" for the eventpoll file */ CLASS(fd, f)(epfd); if (fd_empty(f)) return -EBADF; - /* - * We have to check that the file structure underneath the fd - * the user passed to us _is_ an eventpoll file. - */ - if (!is_file_epoll(fd_file(f))) - return -EINVAL; + ret = ep_check_params(fd_file(f), events, maxevents); + if (unlikely(ret)) + return ret; /* * At this point it is safe to assume that the "private_data" contains From 38d203560118a673018df5892a6555bb0aba7762 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 19 Feb 2025 10:22:25 -0700 Subject: [PATCH 2/3] eventpoll: abstract out ep_try_send_events() helper In preparation for reusing this helper in another epoll setup helper, abstract it out. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20250219172552.1565603-3-axboe@kernel.dk Signed-off-by: Christian Brauner --- fs/eventpoll.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 565bf451df82..14466765b85d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1980,6 +1980,22 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry, return ret; } +static int ep_try_send_events(struct eventpoll *ep, + struct epoll_event __user *events, int maxevents) +{ + int res; + + /* + * Try to transfer events to user space. In case we get 0 events and + * there's still timeout left over, we go trying again in search of + * more luck. + */ + res = ep_send_events(ep, events, maxevents); + if (res > 0) + ep_suspend_napi_irqs(ep); + return res; +} + /** * ep_poll - Retrieves ready events, and delivers them to the caller-supplied * event buffer. @@ -2031,17 +2047,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, while (1) { if (eavail) { - /* - * Try to transfer events to user space. In case we get - * 0 events and there's still timeout left over, we go - * trying again in search of more luck. - */ - res = ep_send_events(ep, events, maxevents); - if (res) { - if (res > 0) - ep_suspend_napi_irqs(ep); + res = ep_try_send_events(ep, events, maxevents); + if (res) return res; - } } if (timed_out) From ae3a4f1fdc2cfad089e79e2ee4697f84941528d3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 19 Feb 2025 10:22:26 -0700 Subject: [PATCH 3/3] eventpoll: add epoll_sendevents() helper Basic helper that copies ready events to the specified userspace address. The event checking is quick and racy, it's up to the caller to ensure it retries appropriately in case 0 events are copied. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20250219172552.1565603-4-axboe@kernel.dk Signed-off-by: Christian Brauner --- fs/eventpoll.c | 20 ++++++++++++++++++++ include/linux/eventpoll.h | 4 ++++ 2 files changed, 24 insertions(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 14466765b85d..94b87aaad0f6 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2474,6 +2474,26 @@ static int ep_check_params(struct file *file, struct epoll_event __user *evs, return 0; } +int epoll_sendevents(struct file *file, struct epoll_event __user *events, + int maxevents) +{ + struct eventpoll *ep; + int ret; + + ret = ep_check_params(file, events, maxevents); + if (unlikely(ret)) + return ret; + + ep = file->private_data; + /* + * Racy call, but that's ok - it should get retried based on + * poll readiness anyway. + */ + if (ep_events_available(ep)) + return ep_try_send_events(ep, events, maxevents); + return 0; +} + /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_wait(2). diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 0c0d00fcd131..ccb478eb174b 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -25,6 +25,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t /* Used to release the epoll bits inside the "struct file" */ void eventpoll_release_file(struct file *file); +/* Copy ready events to userspace */ +int epoll_sendevents(struct file *file, struct epoll_event __user *events, + int maxevents); + /* * This is called from inside fs/file_table.c:__fput() to unlink files * from the eventpoll interface. We need to have this facility to cleanup