Merge branch 'netdev-genl-add-an-xsk-attribute-to-queues'

Joe Damato says:

====================
netdev-genl: Add an xsk attribute to queues

This is an attempt to followup on something Jakub asked me about [1],
adding an xsk attribute to queues and more clearly documenting which
queues are linked to NAPIs...

After the RFC [2], Jakub suggested creating an empty nest for queues
which have a pool, so I've adjusted this version to work that way.

The nest can be extended in the future to express attributes about XSK
as needed. Queues which are not used for AF_XDP do not have the xsk
attribute present.

I've run the included test on:
  - my mlx5 machine (via NETIF=)
  - without setting NETIF

And the test seems to pass in both cases.

[1]: https://lore.kernel.org/netdev/20250113143109.60afa59a@kernel.org/
[2]: https://lore.kernel.org/netdev/20250129172431.65773-1-jdamato@fastly.com/
====================

Link: https://patch.msgid.link/20250214211255.14194-1-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-02-17 16:46:05 -08:00
10 changed files with 194 additions and 4 deletions

View File

@@ -276,6 +276,9 @@ attribute-sets:
doc: The timeout, in nanoseconds, of how long to suspend irq
processing, if event polling finds events
type: uint
-
name: xsk-info
attributes: []
-
name: queue
attributes:
@@ -294,6 +297,9 @@ attribute-sets:
-
name: type
doc: Queue type as rx, tx. Each queue type defines a separate ID space.
XDP TX queues allocated in the kernel are not linked to NAPIs and
thus not listed. AF_XDP queues will have more information set in
the xsk attribute.
type: u32
enum: queue-type
-
@@ -309,7 +315,11 @@ attribute-sets:
doc: io_uring memory provider information.
type: nest
nested-attributes: io-uring-provider-info
-
name: xsk
doc: XSK information for this queue, if any.
type: nest
nested-attributes: xsk-info
-
name: qstats
doc: |
@@ -652,6 +662,7 @@ operations:
- ifindex
- dmabuf
- io-uring
- xsk
dump:
request:
attributes:

View File

@@ -118,6 +118,7 @@
* nla_nest_start(skb, type) start a nested attribute
* nla_nest_end(skb, nla) finalize a nested attribute
* nla_nest_cancel(skb, nla) cancel nested attribute construction
* nla_put_empty_nest(skb, type) create an empty nest
*
* Attribute Length Calculations:
* nla_attr_size(payload) length of attribute w/o padding
@@ -2240,6 +2241,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
nlmsg_trim(skb, start);
}
/**
* nla_put_empty_nest - Create an empty nest
* @skb: socket buffer the message is stored in
* @attrtype: attribute type of the container
*
* This function is a helper for creating empty nests.
*
* Returns: 0 when successful or -EMSGSIZE on failure.
*/
static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype)
{
return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE;
}
/**
* __nla_validate_nested - Validate a stream of nested attributes
* @start: container attribute

View File

@@ -136,6 +136,11 @@ enum {
NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
};
enum {
__NETDEV_A_XSK_INFO_MAX,
NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
};
enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
@@ -143,6 +148,7 @@ enum {
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)

View File

@@ -400,11 +400,23 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
if (params->mp_ops &&
params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
goto nla_put_failure;
#ifdef CONFIG_XDP_SOCKETS
if (rxq->pool)
if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
goto nla_put_failure;
#endif
break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
if (nla_put_napi_id(rsp, txq->napi))
goto nla_put_failure;
#ifdef CONFIG_XDP_SOCKETS
if (txq->pool)
if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
goto nla_put_failure;
#endif
break;
}
genlmsg_end(rsp, hdr);

View File

@@ -136,6 +136,11 @@ enum {
NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
};
enum {
__NETDEV_A_XSK_INFO_MAX,
NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
};
enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
@@ -143,6 +148,7 @@ enum {
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)

View File

@@ -0,0 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
xdp_helper

View File

@@ -1,10 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += $(KHDR_INCLUDES)
TEST_INCLUDES := $(wildcard lib/py/*.py) \
$(wildcard lib/sh/*.sh) \
../../net/net_helper.sh \
../../net/lib.sh \
TEST_GEN_FILES := xdp_helper
TEST_PROGS := \
netcons_basic.sh \
netcons_fragmented_msg.sh \

View File

@@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
CONFIG_NETCONSOLE_EXTENDED_LOG=y
CONFIG_XDP_SOCKETS=y

View File

@@ -2,13 +2,16 @@
# SPDX-License-Identifier: GPL-2.0
from lib.py import ksft_disruptive, ksft_exit, ksft_run
from lib.py import ksft_eq, ksft_raises, KsftSkipEx
from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx
from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import NetDrvEnv
from lib.py import cmd, defer, ip
import errno
import glob
import os
import socket
import struct
import subprocess
def sys_get_queues(ifname, qtype='rx') -> int:
folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
@@ -21,6 +24,39 @@ def nl_get_queues(cfg, nl, qtype='rx'):
return len([q for q in queues if q['type'] == qtype])
return None
def check_xdp(cfg, nl, xdp_queue_id=0) -> None:
test_dir = os.path.dirname(os.path.realpath(__file__))
xdp = subprocess.Popen([f"{test_dir}/xdp_helper", f"{cfg.ifindex}", f"{xdp_queue_id}"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1,
text=True)
defer(xdp.kill)
stdout, stderr = xdp.communicate(timeout=10)
rx = tx = False
if xdp.returncode == 255:
raise KsftSkipEx('AF_XDP unsupported')
elif xdp.returncode > 0:
raise KsftFailEx('unable to create AF_XDP socket')
queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
if not queues:
raise KsftSkipEx("Netlink reports no queues")
for q in queues:
if q['id'] == 0:
if q['type'] == 'rx':
rx = True
if q['type'] == 'tx':
tx = True
ksft_eq(q['xsk'], {})
else:
if 'xsk' in q:
_fail("Check failed: xsk attribute set.")
ksft_eq(rx, True)
ksft_eq(tx, True)
def get_queues(cfg, nl) -> None:
snl = NetdevFamily(recv_size=4096)
@@ -81,7 +117,7 @@ def check_down(cfg, nl) -> None:
def main() -> None:
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily()))
ksft_run([get_queues, addremove_queues, check_down, check_xdp], args=(cfg, NetdevFamily()))
ksft_exit()

View File

@@ -0,0 +1,98 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <linux/if_xdp.h>
#include <linux/if_link.h>
#include <net/if.h>
#include <inttypes.h>
#define UMEM_SZ (1U << 16)
#define NUM_DESC (UMEM_SZ / 2048)
/* this is a simple helper program that creates an XDP socket and does the
* minimum necessary to get bind() to succeed.
*
* this test program is not intended to actually process packets, but could be
* extended in the future if that is actually needed.
*
* it is used by queues.py to ensure the xsk netlinux attribute is set
* correctly.
*/
int main(int argc, char **argv)
{
struct xdp_umem_reg umem_reg = { 0 };
struct sockaddr_xdp sxdp = { 0 };
int num_desc = NUM_DESC;
void *umem_area;
int ifindex;
int sock_fd;
int queue;
char byte;
if (argc != 3) {
fprintf(stderr, "Usage: %s ifindex queue_id", argv[0]);
return 1;
}
sock_fd = socket(AF_XDP, SOCK_RAW, 0);
if (sock_fd < 0) {
perror("socket creation failed");
/* if the kernel doesn't support AF_XDP, let the test program
* know with -1. All other error paths return 1.
*/
if (errno == EAFNOSUPPORT)
return -1;
return 1;
}
ifindex = atoi(argv[1]);
queue = atoi(argv[2]);
umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
if (umem_area == MAP_FAILED) {
perror("mmap failed");
return 1;
}
umem_reg.addr = (uintptr_t)umem_area;
umem_reg.len = UMEM_SZ;
umem_reg.chunk_size = 2048;
umem_reg.headroom = 0;
setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg,
sizeof(umem_reg));
setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc,
sizeof(num_desc));
setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc,
sizeof(num_desc));
setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc));
sxdp.sxdp_family = AF_XDP;
sxdp.sxdp_ifindex = ifindex;
sxdp.sxdp_queue_id = queue;
sxdp.sxdp_flags = 0;
if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) {
munmap(umem_area, UMEM_SZ);
perror("bind failed");
close(sock_fd);
return 1;
}
/* give the parent program some data when the socket is ready*/
fprintf(stdout, "%d\n", sock_fd);
/* parent program will write a byte to stdin when its ready for this
* helper to exit
*/
read(STDIN_FILENO, &byte, 1);
close(sock_fd);
return 0;
}