Merge branch 'selftests-drv-net-improve-the-queue-test-for-xsk'

Jakub Kicinski says:

====================
selftests: drv-net: improve the queue test for XSK

We see some flakes in the the XSK test:

   Exception| Traceback (most recent call last):
   Exception|   File "/home/virtme/testing-18/tools/testing/selftests/net/lib/py/ksft.py", line 218, in ksft_run
   Exception|     case(*args)
   Exception|   File "/home/virtme/testing-18/tools/testing/selftests/drivers/net/./queues.py", line 53, in check_xdp
   Exception|     ksft_eq(q['xsk'], {})
   Exception| KeyError: 'xsk'

I think it's because the method of running the helper in the background
is racy. Add more solid infra for waiting for a background helper to be
initialized.

v1: https://lore.kernel.org/20250218195048.74692-1-kuba@kernel.org
====================

Link: https://patch.msgid.link/20250219234956.520599-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-02-20 17:57:11 -08:00
4 changed files with 161 additions and 40 deletions

View File

@@ -2,16 +2,15 @@
# SPDX-License-Identifier: GPL-2.0
from lib.py import ksft_disruptive, ksft_exit, ksft_run
from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx
from lib.py import ksft_eq, ksft_not_in, ksft_raises, KsftSkipEx, KsftFailEx
from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import NetDrvEnv
from lib.py import cmd, defer, ip
from lib.py import bkg, cmd, defer, ip
import errno
import glob
import os
import socket
import struct
import subprocess
def sys_get_queues(ifname, qtype='rx') -> int:
folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
@@ -24,39 +23,40 @@ def nl_get_queues(cfg, nl, qtype='rx'):
return len([q for q in queues if q['type'] == qtype])
return None
def check_xdp(cfg, nl, xdp_queue_id=0) -> None:
test_dir = os.path.dirname(os.path.realpath(__file__))
xdp = subprocess.Popen([f"{test_dir}/xdp_helper", f"{cfg.ifindex}", f"{xdp_queue_id}"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1,
text=True)
defer(xdp.kill)
stdout, stderr = xdp.communicate(timeout=10)
rx = tx = False
if xdp.returncode == 255:
def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
# Probe for support
xdp = cmd(cfg.rpath("xdp_helper") + ' - -', fail=False)
if xdp.ret == 255:
raise KsftSkipEx('AF_XDP unsupported')
elif xdp.returncode > 0:
elif xdp.ret > 0:
raise KsftFailEx('unable to create AF_XDP socket')
queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
if not queues:
raise KsftSkipEx("Netlink reports no queues")
with bkg(f'{cfg.rpath("xdp_helper")} {cfg.ifindex} {xdp_queue_id}',
ksft_wait=3):
for q in queues:
if q['id'] == 0:
if q['type'] == 'rx':
rx = True
if q['type'] == 'tx':
tx = True
rx = tx = False
ksft_eq(q['xsk'], {})
else:
if 'xsk' in q:
_fail("Check failed: xsk attribute set.")
queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
if not queues:
raise KsftSkipEx("Netlink reports no queues")
for q in queues:
if q['id'] == 0:
if q['type'] == 'rx':
rx = True
if q['type'] == 'tx':
tx = True
ksft_eq(q.get('xsk', None), {},
comment="xsk attr on queue we configured")
else:
ksft_not_in('xsk', q,
comment="xsk attr on queue we didn't configure")
ksft_eq(rx, True)
ksft_eq(tx, True)
ksft_eq(rx, True)
ksft_eq(tx, True)
def get_queues(cfg, nl) -> None:
snl = NetdevFamily(recv_size=4096)
@@ -117,7 +117,8 @@ def check_down(cfg, nl) -> None:
def main() -> None:
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([get_queues, addremove_queues, check_down, check_xdp], args=(cfg, NetdevFamily()))
ksft_run([get_queues, addremove_queues, check_down, check_xsk],
args=(cfg, NetdevFamily()))
ksft_exit()

View File

@@ -14,6 +14,54 @@
#define UMEM_SZ (1U << 16)
#define NUM_DESC (UMEM_SZ / 2048)
/* Move this to a common header when reused! */
static void ksft_ready(void)
{
const char msg[7] = "ready\n";
char *env_str;
int fd;
env_str = getenv("KSFT_READY_FD");
if (env_str) {
fd = atoi(env_str);
if (!fd) {
fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
env_str);
return;
}
} else {
fd = STDOUT_FILENO;
}
write(fd, msg, sizeof(msg));
if (fd != STDOUT_FILENO)
close(fd);
}
static void ksft_wait(void)
{
char *env_str;
char byte;
int fd;
env_str = getenv("KSFT_WAIT_FD");
if (env_str) {
fd = atoi(env_str);
if (!fd) {
fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
env_str);
return;
}
} else {
/* Not running in KSFT env, wait for input from STDIN instead */
fd = STDIN_FILENO;
}
read(fd, &byte, sizeof(byte));
if (fd != STDIN_FILENO)
close(fd);
}
/* this is a simple helper program that creates an XDP socket and does the
* minimum necessary to get bind() to succeed.
*
@@ -32,10 +80,9 @@ int main(int argc, char **argv)
int ifindex;
int sock_fd;
int queue;
char byte;
if (argc != 3) {
fprintf(stderr, "Usage: %s ifindex queue_id", argv[0]);
fprintf(stderr, "Usage: %s ifindex queue_id\n", argv[0]);
return 1;
}
@@ -50,6 +97,13 @@ int main(int argc, char **argv)
return 1;
}
/* "Probing mode", just checking if AF_XDP sockets are supported */
if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) {
printf("AF_XDP support detected\n");
close(sock_fd);
return 0;
}
ifindex = atoi(argv[1]);
queue = atoi(argv[2]);
@@ -85,13 +139,12 @@ int main(int argc, char **argv)
return 1;
}
/* give the parent program some data when the socket is ready*/
fprintf(stdout, "%d\n", sock_fd);
ksft_ready();
ksft_wait();
/* parent program will write a byte to stdin when its ready for this
* helper to exit
*/
read(STDIN_FILENO, &byte, 1);
close(sock_fd);
return 0;

View File

@@ -71,6 +71,11 @@ def ksft_in(a, b, comment=""):
_fail("Check failed", a, "not in", b, comment)
def ksft_not_in(a, b, comment=""):
if a in b:
_fail("Check failed", a, "in", b, comment)
def ksft_is(a, b, comment=""):
if a is not b:
_fail("Check failed", a, "is not", b, comment)

View File

@@ -2,8 +2,10 @@
import errno
import json as _json
import os
import random
import re
import select
import socket
import subprocess
import time
@@ -15,21 +17,56 @@ class CmdExitFailure(Exception):
self.cmd = cmd_obj
def fd_read_timeout(fd, timeout):
rlist, _, _ = select.select([fd], [], [], timeout)
if rlist:
return os.read(fd, 1024)
else:
raise TimeoutError("Timeout waiting for fd read")
class cmd:
def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5):
"""
Execute a command on local or remote host.
Use bkg() instead to run a command in the background.
"""
def __init__(self, comm, shell=True, fail=True, ns=None, background=False,
host=None, timeout=5, ksft_wait=None):
if ns:
comm = f'ip netns exec {ns} ' + comm
self.stdout = None
self.stderr = None
self.ret = None
self.ksft_term_fd = None
self.comm = comm
if host:
self.proc = host.cmd(comm)
else:
# ksft_wait lets us wait for the background process to fully start,
# we pass an FD to the child process, and wait for it to write back.
# Similarly term_fd tells child it's time to exit.
pass_fds = ()
env = os.environ.copy()
if ksft_wait is not None:
rfd, ready_fd = os.pipe()
wait_fd, self.ksft_term_fd = os.pipe()
pass_fds = (ready_fd, wait_fd, )
env["KSFT_READY_FD"] = str(ready_fd)
env["KSFT_WAIT_FD"] = str(wait_fd)
self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stderr=subprocess.PIPE, pass_fds=pass_fds,
env=env)
if ksft_wait is not None:
os.close(ready_fd)
os.close(wait_fd)
msg = fd_read_timeout(rfd, ksft_wait)
os.close(rfd)
if not msg:
raise Exception("Did not receive ready message")
if not background:
self.process(terminate=False, fail=fail, timeout=timeout)
@@ -37,6 +74,8 @@ class cmd:
if fail is None:
fail = not terminate
if self.ksft_term_fd:
os.write(self.ksft_term_fd, b"1")
if terminate:
self.proc.terminate()
stdout, stderr = self.proc.communicate(timeout)
@@ -54,13 +93,36 @@ class cmd:
class bkg(cmd):
"""
Run a command in the background.
Examples usage:
Run a command on remote host, and wait for it to finish.
This is usually paired with wait_port_listen() to make sure
the command has initialized:
with bkg("socat ...", exit_wait=True, host=cfg.remote) as nc:
...
Run a command and expect it to let us know that it's ready
by writing to a special file descriptor passed via KSFT_READY_FD.
Command will be terminated when we exit the context manager:
with bkg("my_binary", ksft_wait=5):
"""
def __init__(self, comm, shell=True, fail=None, ns=None, host=None,
exit_wait=False):
exit_wait=False, ksft_wait=None):
super().__init__(comm, background=True,
shell=shell, fail=fail, ns=ns, host=host)
self.terminate = not exit_wait
shell=shell, fail=fail, ns=ns, host=host,
ksft_wait=ksft_wait)
self.terminate = not exit_wait and not ksft_wait
self.check_fail = fail
if shell and self.terminate:
print("# Warning: combining shell and terminate is risky!")
print("# SIGTERM may not reach the child on zsh/ksh!")
def __enter__(self):
return self