From 2b78d30620d7f8a9f9ce312ad21200ec7a554bd9 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:24:29 +0200 Subject: [PATCH 1/7] ipv4: Convert ip_route_use_hint() to dscp_t. Pass a dscp_t variable to ip_route_use_hint(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Only ip_rcv_finish_core() actually calls ip_route_use_hint(). Use the ip4h_dscp() helper to get the DSCP from the IPv4 header. While there, modify the declaration of ip_route_use_hint() in include/net/route.h so that it matches the prototype of its implementation in net/ipv4/route.c. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/c40994fdf804db7a363d04fdee01bf48dddda676.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 4 ++-- net/ipv4/ip_input.c | 4 ++-- net/ipv4/route.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 5e4374d66927..c219c0fecdcf 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -203,8 +203,8 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); -int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, +int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, const struct sk_buff *hint); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c0a2490eb7c1..89bb63da6852 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -325,8 +325,8 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (ip_can_use_hint(skb, iph, hint)) { - err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos, - dev, hint); + err = ip_route_use_hint(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), dev, hint); if (unlikely(err)) goto drop_error; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e1cd0065b87..ac03916cfcde 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2136,7 +2136,7 @@ static int ip_mkroute_input(struct sk_buff *skb, * Uses the provided hint instead of performing a route lookup. */ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, + dscp_t dscp, struct net_device *dev, const struct sk_buff *hint) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -2160,8 +2160,8 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (rt->rt_type != RTN_LOCAL) goto skip_validate_source; - tos &= INET_DSCP_MASK; - err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag); + err = fib_validate_source(skb, saddr, daddr, inet_dscp_to_dsfield(dscp), + 0, dev, in_dev, &tag); if (err < 0) goto martian_source; From 34f28ffd62c14d8b558b3ef0de6c0ebfc5ca0b1a Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:24:35 +0200 Subject: [PATCH 2/7] ipv4: Convert ip_mkroute_input() to dscp_t. Pass a dscp_t variable to ip_mkroute_input(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Only ip_route_input_slow() actually calls ip_mkroute_input(). Since it already has a dscp_t variable to pass as parameter, we only need to remove the inet_dscp_to_dsfield() conversion. While there, reorganise the function parameters to fill up horizontal space. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/6aa71e28f9ff681cbd70847080e1ab6b526f94f1.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ac03916cfcde..38bb38dbe490 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2112,11 +2112,9 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -static int ip_mkroute_input(struct sk_buff *skb, - struct fib_result *res, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos, - struct flow_keys *hkeys) +static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, + struct in_device *in_dev, __be32 daddr, + __be32 saddr, dscp_t dscp, struct flow_keys *hkeys) { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && fib_info_num_path(res->fi) > 1) { @@ -2128,7 +2126,8 @@ static int ip_mkroute_input(struct sk_buff *skb, #endif /* create a routing cache entry */ - return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); + return __mkroute_input(skb, res, in_dev, daddr, saddr, + inet_dscp_to_dsfield(dscp)); } /* Implements all the saddr-related checks as ip_route_input_slow(), @@ -2315,8 +2314,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto martian_destination; make_route: - err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, - inet_dscp_to_dsfield(dscp), flkeys); + err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp, flkeys); out: return err; brd_input: From 0936c671911f46fcc0cc0c8ad2925eade7f64e80 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:24:42 +0200 Subject: [PATCH 3/7] ipv4: Convert __mkroute_input() to dscp_t. Pass a dscp_t variable to __mkroute_input(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Only ip_mkroute_input() actually calls __mkroute_input(). Since it already has a dscp_t variable to pass as parameter, we only need to remove the inet_dscp_to_dsfield() conversion. While there, reorganise the function parameters to fill up horizontal space. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/40853c720aee4d608e6b1b204982164c3b76697d.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 38bb38dbe490..763b8bafd1bf 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1764,10 +1764,9 @@ static void ip_handle_martian_source(struct net_device *dev, } /* called in rcu_read_lock() section */ -static int __mkroute_input(struct sk_buff *skb, - const struct fib_result *res, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) +static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, + struct in_device *in_dev, __be32 daddr, + __be32 saddr, dscp_t dscp) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); struct net_device *dev = nhc->nhc_dev; @@ -1785,8 +1784,8 @@ static int __mkroute_input(struct sk_buff *skb, return -EINVAL; } - err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), - in_dev->dev, in_dev, &itag); + err = fib_validate_source(skb, saddr, daddr, inet_dscp_to_dsfield(dscp), + FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2126,8 +2125,7 @@ static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, #endif /* create a routing cache entry */ - return __mkroute_input(skb, res, in_dev, daddr, saddr, - inet_dscp_to_dsfield(dscp)); + return __mkroute_input(skb, res, in_dev, daddr, saddr, dscp); } /* Implements all the saddr-related checks as ip_route_input_slow(), From 1a7c292617e4c6dcacf0590909ad9a231df6e25e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:24:48 +0200 Subject: [PATCH 4/7] ipv4: Convert ip_route_input_mc() to dscp_t. Pass a dscp_t variable to ip_route_input_mc(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Only ip_route_input_rcu() actually calls ip_route_input_mc(). Since it already has a dscp_t variable to pass as parameter, we only need to remove the inet_dscp_to_dsfield() conversion. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/0cc653ef59bbc0a28881f706d34896c61eba9e01.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 763b8bafd1bf..527121be1ba2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1697,7 +1697,7 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* called in rcu_read_lock() section */ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, int our) + dscp_t dscp, struct net_device *dev, int our) { struct in_device *in_dev = __in_dev_get_rcu(dev); unsigned int flags = RTCF_MULTICAST; @@ -1705,7 +1705,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 itag = 0; int err; - err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); + err = ip_mc_validate_source(skb, daddr, saddr, + inet_dscp_to_dsfield(dscp), dev, in_dev, + &itag); if (err) return err; @@ -2455,9 +2457,8 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, IN_DEV_MFORWARD(in_dev)) #endif ) { - err = ip_route_input_mc(skb, daddr, saddr, - inet_dscp_to_dsfield(dscp), - dev, our); + err = ip_route_input_mc(skb, daddr, saddr, dscp, dev, + our); } return err; } From d32976408744a589f04b5c939f8f01f7167e5167 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:24:54 +0200 Subject: [PATCH 5/7] ipv4: Convert ip_mc_validate_source() to dscp_t. Pass a dscp_t variable to ip_mc_validate_source(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Callers of ip_mc_validate_source() to consider are: * ip_route_input_mc() which already has a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversion. * udp_v4_early_demux() which gets the DSCP directly from the IPv4 header and can simply use the ip4h_dscp() helper. Also, stop including net/inet_dscp.h in udp.c as we don't use any of its declarations anymore. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/c91b2cca04718b7ee6cf5b9c1d5b40507d65a8d4.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 3 ++- net/ipv4/route.c | 8 ++++---- net/ipv4/udp.c | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index c219c0fecdcf..586e59f7ed8a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -198,8 +198,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 fl4->fl4_gre_key = gre_key; return ip_route_output_key(net, fl4); } + int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, + dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 527121be1ba2..1efb65e647c1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1666,7 +1666,7 @@ EXPORT_SYMBOL(rt_dst_clone); /* called in rcu_read_lock() section */ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, + dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag) { int err; @@ -1687,7 +1687,8 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, ip_hdr(skb)->protocol != IPPROTO_IGMP) return -EINVAL; } else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, + err = fib_validate_source(skb, saddr, 0, + inet_dscp_to_dsfield(dscp), 0, dev, in_dev, itag); if (err < 0) return err; @@ -1705,8 +1706,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 itag = 0; int err; - err = ip_mc_validate_source(skb, daddr, saddr, - inet_dscp_to_dsfield(dscp), dev, in_dev, + err = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev, &itag); if (err) return err; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8accbf4cb295..4b74a25d0b6e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -100,6 +100,7 @@ #include #include #include +#include #include #include #include @@ -115,7 +116,6 @@ #include #include #include -#include #if IS_ENABLED(CONFIG_IPV6) #include #endif @@ -2619,7 +2619,7 @@ int udp_v4_early_demux(struct sk_buff *skb) if (!inet_sk(sk)->inet_daddr && in_dev) return ip_mc_validate_source(skb, iph->daddr, iph->saddr, - iph->tos & INET_DSCP_MASK, + ip4h_dscp(iph), skb->dev, in_dev, &itag); } return 0; From d36236ab52754ef6bd083be945e9c2e93f466022 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:25:02 +0200 Subject: [PATCH 6/7] ipv4: Convert fib_validate_source() to dscp_t. Pass a dscp_t variable to fib_validate_source(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. All callers of fib_validate_source() already have a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversions. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/08612a4519bc5a3578bb493fbaad82437ebb73dc.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 3 ++- net/ipv4/fib_frontend.c | 5 +++-- net/ipv4/route.c | 21 +++++++++------------ 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 967e4dc555fa..06130933542d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -449,8 +449,9 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, __be32 fib_compute_spec_dst(struct sk_buff *skb); bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); + #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 793e6781399a..d0fbc8c8c5e6 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -419,7 +419,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, /* Ignore rp_filter for packets protected by IPsec. */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag) { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); @@ -448,7 +448,8 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, } full_check: - return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); + return __fib_validate_source(skb, src, dst, inet_dscp_to_dsfield(dscp), + oif, dev, r, idev, itag); } static inline __be32 sk_extract_addr(struct sockaddr *addr) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1efb65e647c1..a0b091a7df87 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1687,9 +1687,8 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, ip_hdr(skb)->protocol != IPPROTO_IGMP) return -EINVAL; } else { - err = fib_validate_source(skb, saddr, 0, - inet_dscp_to_dsfield(dscp), 0, dev, - in_dev, itag); + err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev, + itag); if (err < 0) return err; } @@ -1786,8 +1785,8 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, return -EINVAL; } - err = fib_validate_source(skb, saddr, daddr, inet_dscp_to_dsfield(dscp), - FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); + err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), + in_dev->dev, in_dev, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2159,8 +2158,8 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (rt->rt_type != RTN_LOCAL) goto skip_validate_source; - err = fib_validate_source(skb, saddr, daddr, inet_dscp_to_dsfield(dscp), - 0, dev, in_dev, &tag); + err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev, + &tag); if (err < 0) goto martian_source; @@ -2298,8 +2297,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, } if (res->type == RTN_LOCAL) { - err = fib_validate_source(skb, saddr, daddr, - inet_dscp_to_dsfield(dscp), 0, dev, + err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev, &itag); if (err < 0) goto martian_source; @@ -2322,9 +2320,8 @@ out: return err; goto e_inval; if (!ipv4_is_zeronet(saddr)) { - err = fib_validate_source(skb, saddr, 0, - inet_dscp_to_dsfield(dscp), 0, dev, - in_dev, &itag); + err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev, + &itag); if (err < 0) goto martian_source; } From 3768b402735ea3a580e46d8e6c94779e2f42fb4c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:25:08 +0200 Subject: [PATCH 7/7] ipv4: Convert __fib_validate_source() to dscp_t. Pass a dscp_t variable to __fib_validate_source(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Only fib_validate_source() actually calls __fib_validate_source(). Since it already has a dscp_t variable to pass as parameter, we only need to remove the inet_dscp_to_dsfield() conversion. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/8206b0a64a21a208ed94774e261a251c8d7bc251.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d0fbc8c8c5e6..8353518b110a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -342,7 +342,7 @@ EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev); * called with rcu_read_lock() */ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, int rpf, struct in_device *idev, u32 *itag) { struct net *net = dev_net(dev); @@ -357,7 +357,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; - fl4.flowi4_tos = tos; + fl4.flowi4_tos = inet_dscp_to_dsfield(dscp); fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; @@ -448,8 +448,8 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, } full_check: - return __fib_validate_source(skb, src, dst, inet_dscp_to_dsfield(dscp), - oif, dev, r, idev, itag); + return __fib_validate_source(skb, src, dst, dscp, oif, dev, r, idev, + itag); } static inline __be32 sk_extract_addr(struct sockaddr *addr)