From b5a899154aa94cc573db3ae1f61dabe7bfe8b579 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Mar 2024 21:24:06 -0800 Subject: [PATCH 1/3] netlink: handle EMSGSIZE errors in the core Eric points out that our current suggested way of handling EMSGSIZE errors ((err == -EMSGSIZE) ? skb->len : err) will break if we didn't fit even a single object into the buffer provided by the user. This should not happen for well behaved applications, but we can fix that, and free netlink families from dealing with that completely by moving error handling into the core. Let's assume from now on that all EMSGSIZE errors in dumps are because we run out of skb space. Families can now propagate the error nla_put_*() etc generated and not worry about any return value magic. If some family really wants to send EMSGSIZE to user space, assuming it generates the same error on the next dump iteration the skb->len should be 0, and user space should still see the EMSGSIZE. This should simplify families and prevent mistakes in return values which lead to DONE being forced into a separate recv() call as discovered by Ido some time ago. Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ad7b645e3ae7..da846212fb9b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2267,6 +2267,15 @@ static int netlink_dump(struct sock *sk, bool lock_taken) if (extra_mutex) mutex_unlock(extra_mutex); + /* EMSGSIZE plus something already in the skb means + * that there's more to dump but current skb has filled up. + * If the callback really wants to return EMSGSIZE to user space + * it needs to do so again, on the next cb->dump() call, + * without putting data in the skb. + */ + if (nlk->dump_done_errno == -EMSGSIZE && skb->len) + nlk->dump_done_errno = skb->len; + cb->extack = NULL; } From 0b11b1c5c320555483e8a94c44549db24c289987 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Mar 2024 21:24:07 -0800 Subject: [PATCH 2/3] netdev: let netlink core handle -EMSGSIZE errors Previous change added -EMSGSIZE handling to af_netlink, we don't have to hide these errors any longer. Theoretically the error handling changes from: if (err == -EMSGSIZE) to if (err == -EMSGSIZE && skb->len) everywhere, but in practice it doesn't matter. All messages fit into NLMSG_GOODSIZE, so overflow of an empty skb cannot happen. Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/netdev-genl.c | 15 +++------------ net/core/page_pool_user.c | 2 -- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index fd98936da3ae..918b109e0cf4 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -152,10 +152,7 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } rtnl_unlock(); - if (err != -EMSGSIZE) - return err; - - return skb->len; + return err; } static int @@ -287,10 +284,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } rtnl_unlock(); - if (err != -EMSGSIZE) - return err; - - return skb->len; + return err; } static int @@ -463,10 +457,7 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } rtnl_unlock(); - if (err != -EMSGSIZE) - return err; - - return skb->len; + return err; } static int netdev_genl_netdevice_event(struct notifier_block *nb, diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index ffe5244e5597..53ad96f71b63 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -102,8 +102,6 @@ netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb, mutex_unlock(&page_pools_lock); rtnl_unlock(); - if (skb->len && err == -EMSGSIZE) - return skb->len; return err; } From 87d381973e49404f658d6923a617932eeda9415f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Mar 2024 21:24:08 -0800 Subject: [PATCH 3/3] genetlink: fit NLMSG_DONE into same read() as families Make sure ctrl_fill_info() returns sensible error codes and propagate them out to netlink core. Let netlink core decide when to return skb->len and when to treat the exit as an error. Netlink core does better job at it, if we always return skb->len the core doesn't know when we're done dumping and NLMSG_DONE ends up in a separate read(). Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 50ec599a5cff..3b7666944b11 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1232,7 +1232,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) - return -1; + return -EMSGSIZE; if (nla_put_string(skb, CTRL_ATTR_FAMILY_NAME, family->name) || nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id) || @@ -1355,6 +1355,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); int fams_to_skip = cb->args[0]; unsigned int id; + int err = 0; idr_for_each_entry(&genl_fam_idr, rt, id) { if (!rt->netnsok && !net_eq(net, &init_net)) @@ -1363,16 +1364,17 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) if (n++ < fams_to_skip) continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - skb, CTRL_CMD_NEWFAMILY) < 0) { + err = ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, CTRL_CMD_NEWFAMILY); + if (err) { n--; break; } } cb->args[0] = n; - return skb->len; + return err; } static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family,