mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-04 02:34:10 -04:00
Merge branch 'mitigate-double-allocations-in-ioam6_iptunnel'
Justin Iurman says: ==================== Mitigate double allocations in ioam6_iptunnel Commitdce525185b("net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue") fixed the double allocation issue in ioam6_iptunnel. However, since commit92191dd107("net: ipv6: fix dst ref loops in rpl, seg6 and ioam6 lwtunnels"), the fix was left incomplete. Because the cache is now empty when the dst_entry is the same post transformation in order to avoid a reference loop, the double reallocation is back for such cases (e.g., inline mode) which are valid for IOAM. This patch provides a way to detect such cases without having a reference loop in the cache, and so to avoid the double reallocation issue for all cases again. v1: https://lore.kernel.org/netdev/20250410152432.30246-1-justin.iurman@uliege.be/T/#t ==================== Link: https://patch.msgid.link/20250415112554.23823-1-justin.iurman@uliege.be Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
@@ -38,6 +38,7 @@ struct ioam6_lwt_freq {
|
||||
};
|
||||
|
||||
struct ioam6_lwt {
|
||||
struct dst_entry null_dst;
|
||||
struct dst_cache cache;
|
||||
struct ioam6_lwt_freq freq;
|
||||
atomic_t pkt_cnt;
|
||||
@@ -177,6 +178,14 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
|
||||
if (err)
|
||||
goto free_lwt;
|
||||
|
||||
/* This "fake" dst_entry will be stored in a dst_cache, which will call
|
||||
* dst_hold() and dst_release() on it. We must ensure that dst_destroy()
|
||||
* will never be called. For that, its initial refcount is 1 and +1 when
|
||||
* it is stored in the cache. Then, +1/-1 each time we read the cache
|
||||
* and release it. Long story short, we're fine.
|
||||
*/
|
||||
dst_init(&ilwt->null_dst, NULL, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT);
|
||||
|
||||
atomic_set(&ilwt->pkt_cnt, 0);
|
||||
ilwt->freq.k = freq_k;
|
||||
ilwt->freq.n = freq_n;
|
||||
@@ -336,7 +345,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
|
||||
|
||||
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL;
|
||||
struct dst_entry *orig_dst = skb_dst(skb);
|
||||
struct dst_entry *dst = NULL;
|
||||
struct ioam6_lwt *ilwt;
|
||||
int err = -EINVAL;
|
||||
u32 pkt_cnt;
|
||||
@@ -344,7 +354,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
if (skb->protocol != htons(ETH_P_IPV6))
|
||||
goto drop;
|
||||
|
||||
ilwt = ioam6_lwt_state(dst->lwtstate);
|
||||
ilwt = ioam6_lwt_state(orig_dst->lwtstate);
|
||||
|
||||
/* Check for insertion frequency (i.e., "k over n" insertions) */
|
||||
pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
|
||||
@@ -352,9 +362,20 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
goto out;
|
||||
|
||||
local_bh_disable();
|
||||
cache_dst = dst_cache_get(&ilwt->cache);
|
||||
dst = dst_cache_get(&ilwt->cache);
|
||||
local_bh_enable();
|
||||
|
||||
/* This is how we notify that the destination does not change after
|
||||
* transformation and that we need to use orig_dst instead of the cache
|
||||
*/
|
||||
if (dst == &ilwt->null_dst) {
|
||||
dst_release(dst);
|
||||
|
||||
dst = orig_dst;
|
||||
/* keep refcount balance: dst_release() is called at the end */
|
||||
dst_hold(dst);
|
||||
}
|
||||
|
||||
switch (ilwt->mode) {
|
||||
case IOAM6_IPTUNNEL_MODE_INLINE:
|
||||
do_inline:
|
||||
@@ -362,7 +383,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
|
||||
goto out;
|
||||
|
||||
err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst);
|
||||
err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
|
||||
if (unlikely(err))
|
||||
goto drop;
|
||||
|
||||
@@ -372,7 +393,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
/* Encapsulation (ip6ip6) */
|
||||
err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
|
||||
ilwt->has_tunsrc, &ilwt->tunsrc,
|
||||
&ilwt->tundst, cache_dst);
|
||||
&ilwt->tundst, dst);
|
||||
if (unlikely(err))
|
||||
goto drop;
|
||||
|
||||
@@ -390,7 +411,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
goto drop;
|
||||
}
|
||||
|
||||
if (unlikely(!cache_dst)) {
|
||||
if (unlikely(!dst)) {
|
||||
struct ipv6hdr *hdr = ipv6_hdr(skb);
|
||||
struct flowi6 fl6;
|
||||
|
||||
@@ -401,20 +422,27 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
fl6.flowi6_mark = skb->mark;
|
||||
fl6.flowi6_proto = hdr->nexthdr;
|
||||
|
||||
cache_dst = ip6_route_output(net, NULL, &fl6);
|
||||
if (cache_dst->error) {
|
||||
err = cache_dst->error;
|
||||
dst = ip6_route_output(net, NULL, &fl6);
|
||||
if (dst->error) {
|
||||
err = dst->error;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/* cache only if we don't create a dst reference loop */
|
||||
if (dst->lwtstate != cache_dst->lwtstate) {
|
||||
local_bh_disable();
|
||||
dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr);
|
||||
local_bh_enable();
|
||||
}
|
||||
/* If the destination is the same after transformation (which is
|
||||
* a valid use case for IOAM), then we don't want to add it to
|
||||
* the cache in order to avoid a reference loop. Instead, we add
|
||||
* our fake dst_entry to the cache as a way to detect this case.
|
||||
* Otherwise, we add the resolved destination to the cache.
|
||||
*/
|
||||
local_bh_disable();
|
||||
if (orig_dst->lwtstate == dst->lwtstate)
|
||||
dst_cache_set_ip6(&ilwt->cache,
|
||||
&ilwt->null_dst, &fl6.saddr);
|
||||
else
|
||||
dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
|
||||
local_bh_enable();
|
||||
|
||||
err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev));
|
||||
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
|
||||
if (unlikely(err))
|
||||
goto drop;
|
||||
}
|
||||
@@ -422,22 +450,26 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
/* avoid lwtunnel_output() reentry loop when destination is the same
|
||||
* after transformation (e.g., with the inline mode)
|
||||
*/
|
||||
if (dst->lwtstate != cache_dst->lwtstate) {
|
||||
if (orig_dst->lwtstate != dst->lwtstate) {
|
||||
skb_dst_drop(skb);
|
||||
skb_dst_set(skb, cache_dst);
|
||||
skb_dst_set(skb, dst);
|
||||
return dst_output(net, sk, skb);
|
||||
}
|
||||
out:
|
||||
dst_release(cache_dst);
|
||||
return dst->lwtstate->orig_output(net, sk, skb);
|
||||
dst_release(dst);
|
||||
return orig_dst->lwtstate->orig_output(net, sk, skb);
|
||||
drop:
|
||||
dst_release(cache_dst);
|
||||
dst_release(dst);
|
||||
kfree_skb(skb);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ioam6_destroy_state(struct lwtunnel_state *lwt)
|
||||
{
|
||||
/* Since the refcount of per-cpu dst_entry caches will never be 0 (see
|
||||
* why above) when our "fake" dst_entry is used, it is not necessary to
|
||||
* remove them before calling dst_cache_destroy()
|
||||
*/
|
||||
dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user