From a382a34276cb94d1cdc620b622dd85c55589a166 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 23 Feb 2026 14:38:32 -0800 Subject: [PATCH 1/3] selftests/vsock: change tests to respect write-once child ns mode The child_ns_mode sysctl parameter becomes write-once in a future patch in this series, which breaks existing tests. This patch updates the tests to respect this new policy. No additional tests are added. Add "global-parent" and "local-parent" namespaces as intermediaries to spawn namespaces in the given modes. This avoids the need to change "child_ns_mode" in the init_ns. nsenter must be used because ip netns unshares the mount namespace so nested "ip netns add" breaks exec calls from the init ns. Adds nsenter to the deps check. Reviewed-by: Stefano Garzarella Signed-off-by: Bobby Eshleman Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-1-c0cde6959923@meta.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/vsock/vmtest.sh | 39 +++++++++++++------------ 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index dc8dbe74a6d0..86e338886b33 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -210,16 +210,21 @@ check_result() { } add_namespaces() { - local orig_mode - orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode) + ip netns add "global-parent" 2>/dev/null + echo "global" | ip netns exec "global-parent" \ + tee /proc/sys/net/vsock/child_ns_mode &>/dev/null + ip netns add "local-parent" 2>/dev/null + echo "local" | ip netns exec "local-parent" \ + tee /proc/sys/net/vsock/child_ns_mode &>/dev/null - for mode in "${NS_MODES[@]}"; do - echo "${mode}" > /proc/sys/net/vsock/child_ns_mode - ip netns add "${mode}0" 2>/dev/null - ip netns add "${mode}1" 2>/dev/null - done - - echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode + nsenter --net=/var/run/netns/global-parent \ + ip netns add "global0" 2>/dev/null + nsenter --net=/var/run/netns/global-parent \ + ip netns add "global1" 2>/dev/null + nsenter --net=/var/run/netns/local-parent \ + ip netns add "local0" 2>/dev/null + nsenter --net=/var/run/netns/local-parent \ + ip netns add "local1" 2>/dev/null } init_namespaces() { @@ -237,6 +242,8 @@ del_namespaces() { log_host "removed ns ${mode}0" log_host "removed ns ${mode}1" done + ip netns del "global-parent" &>/dev/null + ip netns del "local-parent" &>/dev/null } vm_ssh() { @@ -287,7 +294,7 @@ check_args() { } check_deps() { - for dep in vng ${QEMU} busybox pkill ssh ss socat; do + for dep in vng ${QEMU} busybox pkill ssh ss socat nsenter; do if [[ ! -x $(command -v "${dep}") ]]; then echo -e "skip: dependency ${dep} not found!\n" exit "${KSFT_SKIP}" @@ -1231,12 +1238,8 @@ test_ns_local_same_cid_ok() { } test_ns_host_vsock_child_ns_mode_ok() { - local orig_mode - local rc + local rc="${KSFT_PASS}" - orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode) - - rc="${KSFT_PASS}" for mode in "${NS_MODES[@]}"; do local ns="${mode}0" @@ -1246,15 +1249,13 @@ test_ns_host_vsock_child_ns_mode_ok() { continue fi - if ! echo "${mode}" > /proc/sys/net/vsock/child_ns_mode; then - log_host "child_ns_mode should be writable to ${mode}" + if ! echo "${mode}" | ip netns exec "${ns}" \ + tee /proc/sys/net/vsock/child_ns_mode &>/dev/null; then rc="${KSFT_FAIL}" continue fi done - echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode - return "${rc}" } From 102eab95f025b4d3f3a6c0a858400aca2af2fe52 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 23 Feb 2026 14:38:33 -0800 Subject: [PATCH 2/3] vsock: lock down child_ns_mode as write-once Two administrator processes may race when setting child_ns_mode as one process sets child_ns_mode to "local" and then creates a namespace, but another process changes child_ns_mode to "global" between the write and the namespace creation. The first process ends up with a namespace in "global" mode instead of "local". While this can be detected after the fact by reading ns_mode and retrying, it is fragile and error-prone. Make child_ns_mode write-once so that a namespace manager can set it once and be sure it won't change. Writing a different value after the first write returns -EBUSY. This applies to all namespaces, including init_net, where an init process can write "local" to lock all future namespaces into local mode. Fixes: eafb64f40ca4 ("vsock: add netns to vsock core") Suggested-by: Daan De Meyer Suggested-by: Stefano Garzarella Co-developed-by: Stefano Garzarella Signed-off-by: Stefano Garzarella Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-2-c0cde6959923@meta.com Signed-off-by: Paolo Abeni --- include/net/af_vsock.h | 13 +++++++++++-- include/net/netns/vsock.h | 3 +++ net/vmw_vsock/af_vsock.c | 15 ++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d3ff48a2fbe0..533d8e75f7bb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -276,10 +276,19 @@ static inline bool vsock_net_mode_global(struct vsock_sock *vsk) return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL; } -static inline void vsock_net_set_child_mode(struct net *net, +static inline bool vsock_net_set_child_mode(struct net *net, enum vsock_net_mode mode) { - WRITE_ONCE(net->vsock.child_ns_mode, mode); + int new_locked = mode + 1; + int old_locked = 0; /* unlocked */ + + if (try_cmpxchg(&net->vsock.child_ns_mode_locked, + &old_locked, new_locked)) { + WRITE_ONCE(net->vsock.child_ns_mode, mode); + return true; + } + + return old_locked == new_locked; } static inline enum vsock_net_mode vsock_net_child_mode(struct net *net) diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h index b34d69a22fa8..dc8cbe45f406 100644 --- a/include/net/netns/vsock.h +++ b/include/net/netns/vsock.h @@ -17,5 +17,8 @@ struct netns_vsock { enum vsock_net_mode mode; enum vsock_net_mode child_ns_mode; + + /* 0 = unlocked, 1 = locked to global, 2 = locked to local */ + int child_ns_mode_locked; }; #endif /* __NET_NET_NAMESPACE_VSOCK_H */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index f4062c6a1944..2f7d94d682cb 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -90,16 +90,20 @@ * * - /proc/sys/net/vsock/ns_mode (read-only) reports the current namespace's * mode, which is set at namespace creation and immutable thereafter. - * - /proc/sys/net/vsock/child_ns_mode (writable) controls what mode future + * - /proc/sys/net/vsock/child_ns_mode (write-once) controls what mode future * child namespaces will inherit when created. The initial value matches * the namespace's own ns_mode. * * Changing child_ns_mode only affects newly created namespaces, not the * current namespace or existing children. A "local" namespace cannot set - * child_ns_mode to "global". At namespace creation, ns_mode is inherited - * from the parent's child_ns_mode. + * child_ns_mode to "global". child_ns_mode is write-once, so that it may be + * configured and locked down by a namespace manager. Writing a different + * value after the first write returns -EBUSY. At namespace creation, ns_mode + * is inherited from the parent's child_ns_mode. * - * The init_net mode is "global" and cannot be modified. + * The init_net mode is "global" and cannot be modified. The init_net + * child_ns_mode is also write-once, so an init process (e.g. systemd) can + * set it to "local" to ensure all new namespaces inherit local mode. * * The modes affect the allocation and accessibility of CIDs as follows: * @@ -2853,7 +2857,8 @@ static int vsock_net_child_mode_string(const struct ctl_table *table, int write, new_mode == VSOCK_NET_MODE_GLOBAL) return -EPERM; - vsock_net_set_child_mode(net, new_mode); + if (!vsock_net_set_child_mode(net, new_mode)) + return -EBUSY; } return 0; From b6302e057fdc8f199ddae736ecdf45029f892e5c Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 23 Feb 2026 14:38:34 -0800 Subject: [PATCH 3/3] vsock: document write-once behavior of the child_ns_mode sysctl Update the vsock child_ns_mode documentation to include the new write-once semantics of setting child_ns_mode. The semantics are implemented in a preceding patch in this series. Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-3-c0cde6959923@meta.com Signed-off-by: Paolo Abeni --- Documentation/admin-guide/sysctl/net.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index c10530624f1e..3b2ad61995d4 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -594,6 +594,9 @@ Values: their sockets will only be able to connect within their own namespace. +The first write to ``child_ns_mode`` locks its value. Subsequent writes of the +same value succeed, but writing a different value returns ``-EBUSY``. + Changing ``child_ns_mode`` only affects namespaces created after the change; it does not modify the current namespace or any existing children.