From d55acb9732d981c7a8e07dd63089a77d2938e382 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 15 Apr 2025 13:25:53 +0200 Subject: [PATCH 1/2] net: ipv6: ioam6: use consistent dst names Be consistent and use the same terminology as other lwt users: orig_dst is the dst_entry before the transformation, while dst is either the dst_entry in the cache or the dst_entry after the transformation Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20250415112554.23823-2-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- net/ipv6/ioam6_iptunnel.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 09065187378e..57200b9991a1 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -336,7 +336,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; struct ioam6_lwt *ilwt; int err = -EINVAL; u32 pkt_cnt; @@ -344,7 +345,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ilwt = ioam6_lwt_state(dst->lwtstate); + ilwt = ioam6_lwt_state(orig_dst->lwtstate); /* Check for insertion frequency (i.e., "k over n" insertions) */ pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt); @@ -352,7 +353,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto out; local_bh_disable(); - cache_dst = dst_cache_get(&ilwt->cache); + dst = dst_cache_get(&ilwt->cache); local_bh_enable(); switch (ilwt->mode) { @@ -362,7 +363,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) goto out; - err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst); + err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst); if (unlikely(err)) goto drop; @@ -372,7 +373,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) /* Encapsulation (ip6ip6) */ err = ioam6_do_encap(net, skb, &ilwt->tuninfo, ilwt->has_tunsrc, &ilwt->tunsrc, - &ilwt->tundst, cache_dst); + &ilwt->tundst, dst); if (unlikely(err)) goto drop; @@ -390,7 +391,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - if (unlikely(!cache_dst)) { + if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); struct flowi6 fl6; @@ -401,20 +402,20 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = hdr->nexthdr; - cache_dst = ip6_route_output(net, NULL, &fl6); - if (cache_dst->error) { - err = cache_dst->error; + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; goto drop; } /* cache only if we don't create a dst reference loop */ - if (dst->lwtstate != cache_dst->lwtstate) { + if (orig_dst->lwtstate != dst->lwtstate) { local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); + dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); local_bh_enable(); } - err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) goto drop; } @@ -422,16 +423,16 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) /* avoid lwtunnel_output() reentry loop when destination is the same * after transformation (e.g., with the inline mode) */ - if (dst->lwtstate != cache_dst->lwtstate) { + if (orig_dst->lwtstate != dst->lwtstate) { skb_dst_drop(skb); - skb_dst_set(skb, cache_dst); + skb_dst_set(skb, dst); return dst_output(net, sk, skb); } out: - dst_release(cache_dst); - return dst->lwtstate->orig_output(net, sk, skb); + dst_release(dst); + return orig_dst->lwtstate->orig_output(net, sk, skb); drop: - dst_release(cache_dst); + dst_release(dst); kfree_skb(skb); return err; } From 47ce7c854563fe8450e9cb8dcd62c6470e28076b Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 15 Apr 2025 13:25:54 +0200 Subject: [PATCH 2/2] net: ipv6: ioam6: fix double reallocation If the dst_entry is the same post transformation (which is a valid use case for IOAM), we don't add it to the cache to avoid a reference loop. Instead, we use a "fake" dst_entry and add it to the cache as a signal. When we read the cache, we compare it with our "fake" dst_entry and therefore detect if we're in the special case. Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20250415112554.23823-3-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- net/ipv6/ioam6_iptunnel.c | 41 ++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 57200b9991a1..40df8bdfaacd 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -38,6 +38,7 @@ struct ioam6_lwt_freq { }; struct ioam6_lwt { + struct dst_entry null_dst; struct dst_cache cache; struct ioam6_lwt_freq freq; atomic_t pkt_cnt; @@ -177,6 +178,14 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, if (err) goto free_lwt; + /* This "fake" dst_entry will be stored in a dst_cache, which will call + * dst_hold() and dst_release() on it. We must ensure that dst_destroy() + * will never be called. For that, its initial refcount is 1 and +1 when + * it is stored in the cache. Then, +1/-1 each time we read the cache + * and release it. Long story short, we're fine. + */ + dst_init(&ilwt->null_dst, NULL, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); + atomic_set(&ilwt->pkt_cnt, 0); ilwt->freq.k = freq_k; ilwt->freq.n = freq_n; @@ -356,6 +365,17 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) dst = dst_cache_get(&ilwt->cache); local_bh_enable(); + /* This is how we notify that the destination does not change after + * transformation and that we need to use orig_dst instead of the cache + */ + if (dst == &ilwt->null_dst) { + dst_release(dst); + + dst = orig_dst; + /* keep refcount balance: dst_release() is called at the end */ + dst_hold(dst); + } + switch (ilwt->mode) { case IOAM6_IPTUNNEL_MODE_INLINE: do_inline: @@ -408,12 +428,19 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - /* cache only if we don't create a dst reference loop */ - if (orig_dst->lwtstate != dst->lwtstate) { - local_bh_disable(); + /* If the destination is the same after transformation (which is + * a valid use case for IOAM), then we don't want to add it to + * the cache in order to avoid a reference loop. Instead, we add + * our fake dst_entry to the cache as a way to detect this case. + * Otherwise, we add the resolved destination to the cache. + */ + local_bh_disable(); + if (orig_dst->lwtstate == dst->lwtstate) + dst_cache_set_ip6(&ilwt->cache, + &ilwt->null_dst, &fl6.saddr); + else dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); - local_bh_enable(); - } + local_bh_enable(); err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -439,6 +466,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) static void ioam6_destroy_state(struct lwtunnel_state *lwt) { + /* Since the refcount of per-cpu dst_entry caches will never be 0 (see + * why above) when our "fake" dst_entry is used, it is not necessary to + * remove them before calling dst_cache_destroy() + */ dst_cache_destroy(&ioam6_lwt_state(lwt)->cache); }