From a127c18462ea619a1ace1f00540807e009dbf225 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 14 Feb 2025 21:12:29 +0000 Subject: [PATCH 1/3] netlink: Add nla_put_empty_nest helper Creating empty nests is helpful when the exact attributes to be exposed in the future are not known. Encapsulate the logic in a helper. Signed-off-by: Joe Damato Suggested-by: Jakub Kicinski Link: https://patch.msgid.link/20250214211255.14194-2-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/net/netlink.h b/include/net/netlink.h index e015ffbed819..29e0db940382 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -118,6 +118,7 @@ * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction + * nla_put_empty_nest(skb, type) create an empty nest * * Attribute Length Calculations: * nla_attr_size(payload) length of attribute w/o padding @@ -2240,6 +2241,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) nlmsg_trim(skb, start); } +/** + * nla_put_empty_nest - Create an empty nest + * @skb: socket buffer the message is stored in + * @attrtype: attribute type of the container + * + * This function is a helper for creating empty nests. + * + * Returns: 0 when successful or -EMSGSIZE on failure. + */ +static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype) +{ + return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE; +} + /** * __nla_validate_nested - Validate a stream of nested attributes * @start: container attribute From df524c8f57711a3fe54abb087794819840005fd0 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 14 Feb 2025 21:12:30 +0000 Subject: [PATCH 2/3] netdev-genl: Add an XSK attribute to queues Expose a new per-queue nest attribute, xsk, which will be present for queues that are being used for AF_XDP. If the queue is not being used for AF_XDP, the nest will not be present. In the future, this attribute can be extended to include more data about XSK as it is needed. Signed-off-by: Joe Damato Suggested-by: Jakub Kicinski Link: https://patch.msgid.link/20250214211255.14194-3-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 13 ++++++++++++- include/uapi/linux/netdev.h | 6 ++++++ net/core/netdev-genl.c | 12 ++++++++++++ tools/include/uapi/linux/netdev.h | 6 ++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 288923e965ae..85402a2e289c 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -276,6 +276,9 @@ attribute-sets: doc: The timeout, in nanoseconds, of how long to suspend irq processing, if event polling finds events type: uint + - + name: xsk-info + attributes: [] - name: queue attributes: @@ -294,6 +297,9 @@ attribute-sets: - name: type doc: Queue type as rx, tx. Each queue type defines a separate ID space. + XDP TX queues allocated in the kernel are not linked to NAPIs and + thus not listed. AF_XDP queues will have more information set in + the xsk attribute. type: u32 enum: queue-type - @@ -309,7 +315,11 @@ attribute-sets: doc: io_uring memory provider information. type: nest nested-attributes: io-uring-provider-info - + - + name: xsk + doc: XSK information for this queue, if any. + type: nest + nested-attributes: xsk-info - name: qstats doc: | @@ -652,6 +662,7 @@ operations: - ifindex - dmabuf - io-uring + - xsk dump: request: attributes: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6c6ee183802d..4e82f3871473 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -136,6 +136,11 @@ enum { NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) }; +enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -143,6 +148,7 @@ enum { NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 22ac51356d9f..c92fba65b20d 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -400,11 +400,23 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, if (params->mp_ops && params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif + break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); if (nla_put_napi_id(rsp, txq->napi)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (txq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif + break; } genlmsg_end(rsp, hdr); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 6c6ee183802d..4e82f3871473 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -136,6 +136,11 @@ enum { NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) }; +enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -143,6 +148,7 @@ enum { NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) From 788e52e2b66844301fe09f3372d46d8c62f6ebe4 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 14 Feb 2025 21:12:31 +0000 Subject: [PATCH 3/3] selftests: drv-net: Test queue xsk attribute Test that queues which are used for AF_XDP have the xsk nest attribute. The attribute is currently empty, but its existence means the AF_XDP is being used for the queue. Enable CONFIG_XDP_SOCKETS for selftests/drivers/net tests, as well. Signed-off-by: Joe Damato Suggested-by: Jakub Kicinski Link: https://patch.msgid.link/20250214211255.14194-4-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/.gitignore | 2 + tools/testing/selftests/drivers/net/Makefile | 3 + tools/testing/selftests/drivers/net/config | 1 + tools/testing/selftests/drivers/net/queues.py | 42 +++++++- .../selftests/drivers/net/xdp_helper.c | 98 +++++++++++++++++++ 5 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/.gitignore create mode 100644 tools/testing/selftests/drivers/net/xdp_helper.c diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore new file mode 100644 index 000000000000..ec746f374e85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +xdp_helper diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 28b6d47f812d..0c95bd944d56 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,10 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 +CFLAGS += $(KHDR_INCLUDES) TEST_INCLUDES := $(wildcard lib/py/*.py) \ $(wildcard lib/sh/*.sh) \ ../../net/net_helper.sh \ ../../net/lib.sh \ +TEST_GEN_FILES := xdp_helper + TEST_PROGS := \ netcons_basic.sh \ netcons_fragmented_msg.sh \ diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index a2d8af60876d..f27172ddee0a 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 38303da957ee..5fdfebc6415f 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -2,13 +2,16 @@ # SPDX-License-Identifier: GPL-2.0 from lib.py import ksft_disruptive, ksft_exit, ksft_run -from lib.py import ksft_eq, ksft_raises, KsftSkipEx +from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import NetDrvEnv from lib.py import cmd, defer, ip import errno import glob - +import os +import socket +import struct +import subprocess def sys_get_queues(ifname, qtype='rx') -> int: folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') @@ -21,6 +24,39 @@ def nl_get_queues(cfg, nl, qtype='rx'): return len([q for q in queues if q['type'] == qtype]) return None +def check_xdp(cfg, nl, xdp_queue_id=0) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + xdp = subprocess.Popen([f"{test_dir}/xdp_helper", f"{cfg.ifindex}", f"{xdp_queue_id}"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1, + text=True) + defer(xdp.kill) + + stdout, stderr = xdp.communicate(timeout=10) + rx = tx = False + + if xdp.returncode == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.returncode > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not queues: + raise KsftSkipEx("Netlink reports no queues") + + for q in queues: + if q['id'] == 0: + if q['type'] == 'rx': + rx = True + if q['type'] == 'tx': + tx = True + + ksft_eq(q['xsk'], {}) + else: + if 'xsk' in q: + _fail("Check failed: xsk attribute set.") + + ksft_eq(rx, True) + ksft_eq(tx, True) def get_queues(cfg, nl) -> None: snl = NetdevFamily(recv_size=4096) @@ -81,7 +117,7 @@ def check_down(cfg, nl) -> None: def main() -> None: with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily())) + ksft_run([get_queues, addremove_queues, check_down, check_xdp], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/drivers/net/xdp_helper.c new file mode 100644 index 000000000000..cf06a88b830b --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp_helper.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UMEM_SZ (1U << 16) +#define NUM_DESC (UMEM_SZ / 2048) + +/* this is a simple helper program that creates an XDP socket and does the + * minimum necessary to get bind() to succeed. + * + * this test program is not intended to actually process packets, but could be + * extended in the future if that is actually needed. + * + * it is used by queues.py to ensure the xsk netlinux attribute is set + * correctly. + */ +int main(int argc, char **argv) +{ + struct xdp_umem_reg umem_reg = { 0 }; + struct sockaddr_xdp sxdp = { 0 }; + int num_desc = NUM_DESC; + void *umem_area; + int ifindex; + int sock_fd; + int queue; + char byte; + + if (argc != 3) { + fprintf(stderr, "Usage: %s ifindex queue_id", argv[0]); + return 1; + } + + sock_fd = socket(AF_XDP, SOCK_RAW, 0); + if (sock_fd < 0) { + perror("socket creation failed"); + /* if the kernel doesn't support AF_XDP, let the test program + * know with -1. All other error paths return 1. + */ + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + ifindex = atoi(argv[1]); + queue = atoi(argv[2]); + + umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (umem_area == MAP_FAILED) { + perror("mmap failed"); + return 1; + } + + umem_reg.addr = (uintptr_t)umem_area; + umem_reg.len = UMEM_SZ; + umem_reg.chunk_size = 2048; + umem_reg.headroom = 0; + + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, + sizeof(umem_reg)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc)); + + sxdp.sxdp_family = AF_XDP; + sxdp.sxdp_ifindex = ifindex; + sxdp.sxdp_queue_id = queue; + sxdp.sxdp_flags = 0; + + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) { + munmap(umem_area, UMEM_SZ); + perror("bind failed"); + close(sock_fd); + return 1; + } + + /* give the parent program some data when the socket is ready*/ + fprintf(stdout, "%d\n", sock_fd); + + /* parent program will write a byte to stdin when its ready for this + * helper to exit + */ + read(STDIN_FILENO, &byte, 1); + + close(sock_fd); + return 0; +}