From c0c5a60f0f1311bcf08bbe735122096d6326fb5b Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 7 Nov 2020 20:35:13 +0100 Subject: [PATCH 1/3] net: evaluate net.ipvX.conf.all.ignore_routes_with_linkdown Introduced in 0eeb075fad73, the "ignore_routes_with_linkdown" sysctl ignores a route whose interface is down. It is provided as a per-interface sysctl. However, while a "all" variant is exposed, it was a noop since it was never evaluated. We use the usual "or" logic for this kind of sysctls. Tested with: ip link add type veth # veth0 + veth1 ip link add type veth # veth1 + veth2 ip link set up dev veth0 ip link set up dev veth1 # link-status paired with veth0 ip link set up dev veth2 ip link set up dev veth3 # link-status paired with veth2 # First available path ip -4 addr add 203.0.113.${uts#H}/24 dev veth0 ip -6 addr add 2001:db8:1::${uts#H}/64 dev veth0 # Second available path ip -4 addr add 192.0.2.${uts#H}/24 dev veth2 ip -6 addr add 2001:db8:2::${uts#H}/64 dev veth2 # More specific route through first path ip -4 route add 198.51.100.0/25 via 203.0.113.254 # via veth0 ip -6 route add 2001:db8:3::/56 via 2001:db8:1::ff # via veth0 # Less specific route through second path ip -4 route add 198.51.100.0/24 via 192.0.2.254 # via veth2 ip -6 route add 2001:db8:3::/48 via 2001:db8:2::ff # via veth2 # H1: enable on "all" # H2: enable on "veth0" for v in ipv4 ipv6; do case $uts in H1) sysctl -qw net.${v}.conf.all.ignore_routes_with_linkdown=1 ;; H2) sysctl -qw net.${v}.conf.veth0.ignore_routes_with_linkdown=1 ;; esac done set -xe # When veth0 is up, best route is through veth0 ip -o route get 198.51.100.1 | grep -Fw veth0 ip -o route get 2001:db8:3::1 | grep -Fw veth0 # When veth0 is down, best route should be through veth2 on H1/H2, # but on veth0 on H2 ip link set down dev veth1 # down veth0 ip route show [ $uts != H3 ] || ip -o route get 198.51.100.1 | grep -Fw veth0 [ $uts != H3 ] || ip -o route get 2001:db8:3::1 | grep -Fw veth0 [ $uts = H3 ] || ip -o route get 198.51.100.1 | grep -Fw veth2 [ $uts = H3 ] || ip -o route get 2001:db8:3::1 | grep -Fw veth2 Without this patch, the two last lines would fail on H1 (the one using the "all" sysctl). With the patch, everything succeeds as expected. Also document the sysctl in `ip-sysctl.rst`. Fixes: 0eeb075fad73 ("net: ipv4 sysctl option to ignore routes when nexthop link is down") Signed-off-by: Vincent Bernat Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 3 +++ include/linux/inetdevice.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 2aaf40b2d2cd..dd2b12a32b73 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1554,6 +1554,9 @@ igmpv3_unsolicited_report_interval - INTEGER Default: 1000 (1 seconds) +ignore_routes_with_linkdown - BOOLEAN + Ignore routes whose link is down when performing a FIB lookup. + promote_secondaries - BOOLEAN When a primary IP address is removed from this interface promote a corresponding secondary IP address instead of diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 3515ca64e638..3bbcddd22df8 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -126,7 +126,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) #define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ - IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) + IN_DEV_ORCONF((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) From 1af5318c00a8acc33a90537af49b3f23f72a2c4b Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 7 Nov 2020 20:35:14 +0100 Subject: [PATCH 2/3] net: evaluate net.ipv4.conf.all.proxy_arp_pvlan Introduced in 65324144b50b, the "proxy_arp_vlan" sysctl is a per-interface sysctl to tune proxy ARP support for private VLANs. While the "all" variant is exposed, it was a noop and never evaluated. We use the usual "or" logic for this kind of sysctls. Fixes: 65324144b50b ("net: RFC3069, private VLAN proxy arp support") Signed-off-by: Vincent Bernat Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 3bbcddd22df8..53aa0343bf69 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -105,7 +105,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) #define IN_DEV_PROXY_ARP(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP) -#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_CONF_GET(in_dev, PROXY_ARP_PVLAN) +#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP_PVLAN) #define IN_DEV_SHARED_MEDIA(in_dev) IN_DEV_ORCONF((in_dev), SHARED_MEDIA) #define IN_DEV_TX_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), SEND_REDIRECTS) #define IN_DEV_SEC_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), \ From 62679a8d3aa4ba15ff63574a43e5686078d7b804 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 7 Nov 2020 20:35:15 +0100 Subject: [PATCH 3/3] net: evaluate net.ipvX.conf.all.disable_policy and disable_xfrm The disable_policy and disable_xfrm are a per-interface sysctl to disable IPsec policy or encryption on an interface. However, while a "all" variant is exposed, it was a noop since it was never evaluated. We use the usual "or" logic for this kind of sysctls. Signed-off-by: Vincent Bernat Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index dc2a399cd9f4..a3b60c41cbad 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1741,7 +1741,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, flags |= RTCF_LOCAL; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) return -ENOBUFS; @@ -1857,8 +1857,8 @@ static int __mkroute_input(struct sk_buff *skb, } rth = rt_dst_alloc(out_dev->dev, 0, res->type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM)); + IN_DEV_ORCONF(in_dev, NOPOLICY), + IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -2227,7 +2227,7 @@ out: return err; rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, flags | RTCF_LOCAL, res->type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; @@ -2450,8 +2450,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, add: rth = rt_dst_alloc(dev_out, flags, type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(in_dev, NOXFRM)); + IN_DEV_ORCONF(in_dev, NOPOLICY), + IN_DEV_ORCONF(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS);