From d6c7b03497eef8b66bf0b5572881359913e39787 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:39 -0800 Subject: [PATCH 1/8] net: make sure we retain NAPI ordering on netdev->napi_list Netlink code depends on NAPI instances being sorted by ID on the netdev list for dump continuation. We need to be able to find the position on the list where we left off if dump does not fit in a single skb, and in the meantime NAPI instances can come and go. This was trivially true when we were assigning a new ID to every new NAPI instance. Since we added the NAPI config API, we try to retain the ID previously used for the same queue, but still add the new NAPI instance at the start of the list. This is fine if we reset the entire netdev and all NAPIs get removed and added back. If driver replaces a NAPI instance during an operation like DEVMEM queue reset, or recreates a subset of NAPI instances in other ways we may end up with broken ordering, and therefore Netlink dumps with either missing or duplicated entries. At this stage the problem is theoretical. Only two drivers support queue API, bnxt and gve. gve recreates NAPIs during queue reset, but it doesn't support NAPI config. bnxt supports NAPI config but doesn't recreate instances during reset. We need to save the ID in the config as soon as it is assigned because otherwise the new NAPI will not know what ID it will get at enable time, at the time it is being added. Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- net/core/dev.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 983c24927316..26f0c2fbb8aa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6736,13 +6736,14 @@ static void napi_restore_config(struct napi_struct *n) n->gro_flush_timeout = n->config->gro_flush_timeout; n->irq_suspend_timeout = n->config->irq_suspend_timeout; /* a NAPI ID might be stored in the config, if so use it. if not, use - * napi_hash_add to generate one for us. It will be saved to the config - * in napi_disable. + * napi_hash_add to generate one for us. */ - if (n->config->napi_id) + if (n->config->napi_id) { napi_hash_add_with_id(n, n->config->napi_id); - else + } else { napi_hash_add(n); + n->config->napi_id = n->napi_id; + } } static void napi_save_config(struct napi_struct *n) @@ -6750,10 +6751,39 @@ static void napi_save_config(struct napi_struct *n) n->config->defer_hard_irqs = n->defer_hard_irqs; n->config->gro_flush_timeout = n->gro_flush_timeout; n->config->irq_suspend_timeout = n->irq_suspend_timeout; - n->config->napi_id = n->napi_id; napi_hash_del(n); } +/* Netlink wants the NAPI list to be sorted by ID, if adding a NAPI which will + * inherit an existing ID try to insert it at the right position. + */ +static void +netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) +{ + unsigned int new_id, pos_id; + struct list_head *higher; + struct napi_struct *pos; + + new_id = UINT_MAX; + if (napi->config && napi->config->napi_id) + new_id = napi->config->napi_id; + + higher = &dev->napi_list; + list_for_each_entry(pos, &dev->napi_list, dev_list) { + if (pos->napi_id >= MIN_NAPI_ID) + pos_id = pos->napi_id; + else if (pos->config) + pos_id = pos->config->napi_id; + else + pos_id = UINT_MAX; + + if (pos_id <= new_id) + break; + higher = &pos->dev_list; + } + list_add_rcu(&napi->dev_list, higher); /* adds after higher */ +} + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6780,7 +6810,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, napi->list_owner = -1; set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); - list_add_rcu(&napi->dev_list, &dev->napi_list); + netif_napi_dev_list_add(dev, napi); /* default settings from sysfs are applied to all NAPIs. any per-NAPI * configuration will be loaded in napi_enable From 0b7bdc7fab5703ddff677a82c1de7b3ac500974f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:40 -0800 Subject: [PATCH 2/8] netdev: define NETDEV_INTERNAL Linus suggested during one of past maintainer summits (in context of a DMA_BUF discussion) that symbol namespaces can be used to prevent unwelcome but in-tree code from using all exported functions. Create a namespace for netdev. Export netdev_rx_queue_restart(), drivers may want to use it since it gives them a simple and safe way to restart a queue to apply config changes. But it's both too low level and too actively developed to be used outside netdev. Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- Documentation/networking/netdevices.rst | 10 ++++++++++ net/core/netdev_rx_queue.c | 1 + 2 files changed, 11 insertions(+) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 857c9784f87e..1d37038e9fbe 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -297,3 +297,13 @@ napi->poll: Context: softirq will be called with interrupts disabled by netconsole. + +NETDEV_INTERNAL symbol namespace +================================ + +Symbols exported as NETDEV_INTERNAL can only be used in networking +core and drivers which exclusively flow via the main networking list and trees. +Note that the inverse is not true, most symbols outside of NETDEV_INTERNAL +are not expected to be used by random code outside netdev either. +Symbols may lack the designation because they predate the namespaces, +or simply due to an oversight. diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index e217a5838c87..db82786fa0c4 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -79,3 +79,4 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) return err; } +EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); From 00adf88b186fa09a0b4005bdcf440aa2f926a75b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:41 -0800 Subject: [PATCH 3/8] netdevsim: support NAPI config Link the NAPI instances to their configs. This will be needed to test that NAPI config doesn't break list ordering. Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index e068a9761c09..a4aacd372cdd 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -390,7 +390,7 @@ static int nsim_init_napi(struct netdevsim *ns) for (i = 0; i < dev->num_rx_queues; i++) { rq = &ns->rq[i]; - netif_napi_add(dev, &rq->napi, nsim_poll); + netif_napi_add_config(dev, &rq->napi, nsim_poll, i); } for (i = 0; i < dev->num_rx_queues; i++) { From 915c82f842f955429e347a53ce005604ad421343 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:42 -0800 Subject: [PATCH 4/8] netdevsim: allocate rqs individually Make nsim->rqs an array of pointers and allocate them individually so that we can swap them out one by one. Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 49 +++++++++++++++++++------------ drivers/net/netdevsim/netdevsim.h | 2 +- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index a4aacd372cdd..7fa75f37ec49 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -69,7 +69,7 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) rxq = skb_get_queue_mapping(skb); if (rxq >= peer_dev->num_rx_queues) rxq = rxq % peer_dev->num_rx_queues; - rq = &peer_ns->rq[rxq]; + rq = peer_ns->rq[rxq]; skb_tx_timestamp(skb); if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP)) @@ -388,13 +388,13 @@ static int nsim_init_napi(struct netdevsim *ns) int err, i; for (i = 0; i < dev->num_rx_queues; i++) { - rq = &ns->rq[i]; + rq = ns->rq[i]; netif_napi_add_config(dev, &rq->napi, nsim_poll, i); } for (i = 0; i < dev->num_rx_queues; i++) { - rq = &ns->rq[i]; + rq = ns->rq[i]; err = nsim_create_page_pool(rq); if (err) @@ -405,12 +405,12 @@ static int nsim_init_napi(struct netdevsim *ns) err_pp_destroy: while (i--) { - page_pool_destroy(ns->rq[i].page_pool); - ns->rq[i].page_pool = NULL; + page_pool_destroy(ns->rq[i]->page_pool); + ns->rq[i]->page_pool = NULL; } for (i = 0; i < dev->num_rx_queues; i++) - __netif_napi_del(&ns->rq[i].napi); + __netif_napi_del(&ns->rq[i]->napi); return err; } @@ -421,7 +421,7 @@ static void nsim_enable_napi(struct netdevsim *ns) int i; for (i = 0; i < dev->num_rx_queues; i++) { - struct nsim_rq *rq = &ns->rq[i]; + struct nsim_rq *rq = ns->rq[i]; netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi); napi_enable(&rq->napi); @@ -448,7 +448,7 @@ static void nsim_del_napi(struct netdevsim *ns) int i; for (i = 0; i < dev->num_rx_queues; i++) { - struct nsim_rq *rq = &ns->rq[i]; + struct nsim_rq *rq = ns->rq[i]; napi_disable(&rq->napi); __netif_napi_del(&rq->napi); @@ -456,8 +456,8 @@ static void nsim_del_napi(struct netdevsim *ns) synchronize_net(); for (i = 0; i < dev->num_rx_queues; i++) { - page_pool_destroy(ns->rq[i].page_pool); - ns->rq[i].page_pool = NULL; + page_pool_destroy(ns->rq[i]->page_pool); + ns->rq[i]->page_pool = NULL; } } @@ -628,7 +628,7 @@ nsim_pp_hold_write(struct file *file, const char __user *data, if (!netif_running(ns->netdev) && val) { ret = -ENETDOWN; } else if (val) { - ns->page = page_pool_dev_alloc_pages(ns->rq[0].page_pool); + ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool); if (!ns->page) ret = -ENOMEM; } else { @@ -677,15 +677,26 @@ static int nsim_queue_init(struct netdevsim *ns) struct net_device *dev = ns->netdev; int i; - ns->rq = kvcalloc(dev->num_rx_queues, sizeof(*ns->rq), - GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); + ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq), + GFP_KERNEL_ACCOUNT); if (!ns->rq) return -ENOMEM; - for (i = 0; i < dev->num_rx_queues; i++) - skb_queue_head_init(&ns->rq[i].skb_queue); + for (i = 0; i < dev->num_rx_queues; i++) { + ns->rq[i] = kzalloc(sizeof(**ns->rq), GFP_KERNEL_ACCOUNT); + if (!ns->rq[i]) + goto err_free_prev; + + skb_queue_head_init(&ns->rq[i]->skb_queue); + } return 0; + +err_free_prev: + while (i--) + kfree(ns->rq[i]); + kfree(ns->rq); + return -ENOMEM; } static void nsim_queue_free(struct netdevsim *ns) @@ -693,11 +704,13 @@ static void nsim_queue_free(struct netdevsim *ns) struct net_device *dev = ns->netdev; int i; - for (i = 0; i < dev->num_rx_queues; i++) - skb_queue_purge_reason(&ns->rq[i].skb_queue, + for (i = 0; i < dev->num_rx_queues; i++) { + skb_queue_purge_reason(&ns->rq[i]->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); + kfree(ns->rq[i]); + } - kvfree(ns->rq); + kfree(ns->rq); ns->rq = NULL; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index bf02efa10956..80fde64f4a7a 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -101,7 +101,7 @@ struct netdevsim { struct nsim_dev *nsim_dev; struct nsim_dev_port *nsim_dev_port; struct mock_phc *phc; - struct nsim_rq *rq; + struct nsim_rq **rq; u64 tx_packets; u64 tx_bytes; From a565dd04a120c03891b6fb504f342159b446f463 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:43 -0800 Subject: [PATCH 5/8] netdevsim: add queue alloc/free helpers We'll need the code to allocate and free queues in the queue management API, factor it out. Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 35 +++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 7fa75f37ec49..7b80796dbe26 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -595,6 +595,24 @@ static const struct netdev_stat_ops nsim_stat_ops = { .get_base_stats = nsim_get_base_stats, }; +static struct nsim_rq *nsim_queue_alloc(void) +{ + struct nsim_rq *rq; + + rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT); + if (!rq) + return NULL; + + skb_queue_head_init(&rq->skb_queue); + return rq; +} + +static void nsim_queue_free(struct nsim_rq *rq) +{ + skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); + kfree(rq); +} + static ssize_t nsim_pp_hold_read(struct file *file, char __user *data, size_t count, loff_t *ppos) @@ -683,11 +701,9 @@ static int nsim_queue_init(struct netdevsim *ns) return -ENOMEM; for (i = 0; i < dev->num_rx_queues; i++) { - ns->rq[i] = kzalloc(sizeof(**ns->rq), GFP_KERNEL_ACCOUNT); + ns->rq[i] = nsim_queue_alloc(); if (!ns->rq[i]) goto err_free_prev; - - skb_queue_head_init(&ns->rq[i]->skb_queue); } return 0; @@ -699,16 +715,13 @@ static int nsim_queue_init(struct netdevsim *ns) return -ENOMEM; } -static void nsim_queue_free(struct netdevsim *ns) +static void nsim_queue_uninit(struct netdevsim *ns) { struct net_device *dev = ns->netdev; int i; - for (i = 0; i < dev->num_rx_queues; i++) { - skb_queue_purge_reason(&ns->rq[i]->skb_queue, - SKB_DROP_REASON_QUEUE_PURGE); - kfree(ns->rq[i]); - } + for (i = 0; i < dev->num_rx_queues; i++) + nsim_queue_free(ns->rq[i]); kfree(ns->rq); ns->rq = NULL; @@ -754,7 +767,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns) nsim_macsec_teardown(ns); nsim_bpf_uninit(ns); err_rq_destroy: - nsim_queue_free(ns); + nsim_queue_uninit(ns); err_utn_destroy: rtnl_unlock(); nsim_udp_tunnels_info_destroy(ns->netdev); @@ -836,7 +849,7 @@ void nsim_destroy(struct netdevsim *ns) nsim_macsec_teardown(ns); nsim_ipsec_teardown(ns); nsim_bpf_uninit(ns); - nsim_queue_free(ns); + nsim_queue_uninit(ns); } rtnl_unlock(); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) From 5bc8e8dbef27b73bd7b6d1fd5108b4fd4c6d469f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:44 -0800 Subject: [PATCH 6/8] netdevsim: add queue management API support Add queue management API support. We need a way to reset queues to test NAPI reordering, the queue management API provides a handy scaffolding for that. Reviewed-by: Willem de Bruijn Acked-by: Stanislav Fomichev Signed-off-by: Jakub Kicinski Reviewed-by: Mina Almasry Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 134 +++++++++++++++++++++++++++--- drivers/net/netdevsim/netdevsim.h | 2 + 2 files changed, 124 insertions(+), 12 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 7b80796dbe26..cfb079a34532 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -359,25 +359,24 @@ static int nsim_poll(struct napi_struct *napi, int budget) return done; } -static int nsim_create_page_pool(struct nsim_rq *rq) +static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi) { - struct page_pool_params p = { + struct page_pool_params params = { .order = 0, .pool_size = NSIM_RING_SIZE, .nid = NUMA_NO_NODE, - .dev = &rq->napi.dev->dev, - .napi = &rq->napi, + .dev = &napi->dev->dev, + .napi = napi, .dma_dir = DMA_BIDIRECTIONAL, - .netdev = rq->napi.dev, + .netdev = napi->dev, }; + struct page_pool *pool; - rq->page_pool = page_pool_create(&p); - if (IS_ERR(rq->page_pool)) { - int err = PTR_ERR(rq->page_pool); + pool = page_pool_create(¶ms); + if (IS_ERR(pool)) + return PTR_ERR(pool); - rq->page_pool = NULL; - return err; - } + *p = pool; return 0; } @@ -396,7 +395,7 @@ static int nsim_init_napi(struct netdevsim *ns) for (i = 0; i < dev->num_rx_queues; i++) { rq = ns->rq[i]; - err = nsim_create_page_pool(rq); + err = nsim_create_page_pool(&rq->page_pool, &rq->napi); if (err) goto err_pp_destroy; } @@ -613,6 +612,116 @@ static void nsim_queue_free(struct nsim_rq *rq) kfree(rq); } +/* Queue reset mode is controlled by ns->rq_reset_mode. + * - normal - new NAPI new pool (old NAPI enabled when new added) + * - mode 1 - allocate new pool (NAPI is only disabled / enabled) + * - mode 2 - new NAPI new pool (old NAPI removed before new added) + * - mode 3 - new NAPI new pool (old NAPI disabled when new added) + */ +struct nsim_queue_mem { + struct nsim_rq *rq; + struct page_pool *pp; +}; + +static int +nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + int err; + + if (ns->rq_reset_mode > 3) + return -EINVAL; + + if (ns->rq_reset_mode == 1) + return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi); + + qmem->rq = nsim_queue_alloc(); + if (!qmem->rq) + return -ENOMEM; + + err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi); + if (err) + goto err_free; + + if (!ns->rq_reset_mode) + netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + + return 0; + +err_free: + nsim_queue_free(qmem->rq); + return err; +} + +static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + page_pool_destroy(qmem->pp); + if (qmem->rq) { + if (!ns->rq_reset_mode) + netif_napi_del(&qmem->rq->napi); + page_pool_destroy(qmem->rq->page_pool); + nsim_queue_free(qmem->rq); + } +} + +static int +nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + if (ns->rq_reset_mode == 1) { + ns->rq[idx]->page_pool = qmem->pp; + napi_enable(&ns->rq[idx]->napi); + return 0; + } + + /* netif_napi_add()/_del() should normally be called from alloc/free, + * here we want to test various call orders. + */ + if (ns->rq_reset_mode == 2) { + netif_napi_del(&ns->rq[idx]->napi); + netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + } else if (ns->rq_reset_mode == 3) { + netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + netif_napi_del(&ns->rq[idx]->napi); + } + + ns->rq[idx] = qmem->rq; + napi_enable(&ns->rq[idx]->napi); + + return 0; +} + +static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + napi_disable(&ns->rq[idx]->napi); + + if (ns->rq_reset_mode == 1) { + qmem->pp = ns->rq[idx]->page_pool; + page_pool_disable_direct_recycling(qmem->pp); + } else { + qmem->rq = ns->rq[idx]; + } + + return 0; +} + +static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct nsim_queue_mem), + .ndo_queue_mem_alloc = nsim_queue_mem_alloc, + .ndo_queue_mem_free = nsim_queue_mem_free, + .ndo_queue_start = nsim_queue_start, + .ndo_queue_stop = nsim_queue_stop, +}; + static ssize_t nsim_pp_hold_read(struct file *file, char __user *data, size_t count, loff_t *ppos) @@ -739,6 +848,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns) ns->phc = phc; ns->netdev->netdev_ops = &nsim_netdev_ops; ns->netdev->stat_ops = &nsim_stat_ops; + ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops; err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); if (err) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 80fde64f4a7a..8c50969b1240 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -103,6 +103,8 @@ struct netdevsim { struct mock_phc *phc; struct nsim_rq **rq; + int rq_reset_mode; + u64 tx_packets; u64 tx_bytes; u64 tx_dropped; From 6917d207b469ee81e6dc7f8ccca29c234a16916d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:45 -0800 Subject: [PATCH 7/8] netdevsim: add debugfs-triggered queue reset Support triggering queue reset via debugfs for an upcoming test. Reviewed-by: Willem de Bruijn Acked-by: Stanislav Fomichev Signed-off-by: Jakub Kicinski Reviewed-by: Mina Almasry Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 55 +++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdevsim.h | 1 + 2 files changed, 56 insertions(+) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index cfb079a34532..d013b6498539 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,8 @@ #include "netdevsim.h" +MODULE_IMPORT_NS("NETDEV_INTERNAL"); + #define NSIM_RING_SIZE 256 static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb) @@ -722,6 +725,54 @@ static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = { .ndo_queue_stop = nsim_queue_stop, }; +static ssize_t +nsim_qreset_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct netdevsim *ns = file->private_data; + unsigned int queue, mode; + char buf[32]; + ssize_t ret; + + if (count >= sizeof(buf)) + return -EINVAL; + if (copy_from_user(buf, data, count)) + return -EFAULT; + buf[count] = '\0'; + + ret = sscanf(buf, "%u %u", &queue, &mode); + if (ret != 2) + return -EINVAL; + + rtnl_lock(); + if (!netif_running(ns->netdev)) { + ret = -ENETDOWN; + goto exit_unlock; + } + + if (queue >= ns->netdev->real_num_rx_queues) { + ret = -EINVAL; + goto exit_unlock; + } + + ns->rq_reset_mode = mode; + ret = netdev_rx_queue_restart(ns->netdev, queue); + ns->rq_reset_mode = 0; + if (ret) + goto exit_unlock; + + ret = count; +exit_unlock: + rtnl_unlock(); + return ret; +} + +static const struct file_operations nsim_qreset_fops = { + .open = simple_open, + .write = nsim_qreset_write, + .owner = THIS_MODULE, +}; + static ssize_t nsim_pp_hold_read(struct file *file, char __user *data, size_t count, loff_t *ppos) @@ -934,6 +985,9 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir, ns, &nsim_pp_hold_fops); + ns->qr_dfs = debugfs_create_file("queue_reset", 0200, + nsim_dev_port->ddir, ns, + &nsim_qreset_fops); return ns; @@ -947,6 +1001,7 @@ void nsim_destroy(struct netdevsim *ns) struct net_device *dev = ns->netdev; struct netdevsim *peer; + debugfs_remove(ns->qr_dfs); debugfs_remove(ns->pp_dfs); rtnl_lock(); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 8c50969b1240..a70f62af4c88 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -136,6 +136,7 @@ struct netdevsim { struct page *page; struct dentry *pp_dfs; + struct dentry *qr_dfs; struct nsim_ethtool ethtool; struct netdevsim __rcu *peer; From eb721f117e7d43b561e81dd878c4acfa2de13ee2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:46 -0800 Subject: [PATCH 8/8] selftests: net: test listing NAPI vs queue resets Test listing netdevsim NAPIs before and after a single queue has been reset (and NAPIs re-added). Start from resetting the middle queue because edge cases (first / last) may actually be less likely to trigger bugs. # ./tools/testing/selftests/net/nl_netdev.py KTAP version 1 1..4 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Willem de Bruijn Acked-by: Stanislav Fomichev Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/nl_netdev.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93d9d914529b..93e8cb671c3d 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -18,6 +18,23 @@ def lo_check(nf) -> None: ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) +def napi_list_check(nf) -> None: + with NetdevSimDev(queue_count=100) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100) + + for q in [50, 0, 99]: + for i in range(4): + nsim.dfs_write("queue_reset", f"{q} {i}") + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100, + comment=f"queue count after reset queue {q} mode {i}") + + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: nsim = nsimdev.nsims[0] @@ -89,7 +106,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check], + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check], args=(nf, )) ksft_exit()