From 6b78debe1c07e6aa3c91ca0b1384bf3cb8217c50 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 14 Nov 2023 11:18:55 -0300 Subject: [PATCH 1/2] net/sched: cls_u32: replace int refcounts with proper refcounts Proper refcounts will always warn splat when something goes wrong, be it underflow, saturation or object resurrection. As these are always a source of bugs, use it in cls_u32 as a safeguard to prevent/catch issues. Another benefit is that the refcount API self documents the code, making clear when transitions to dead are expected. For such an update we had to make minor adaptations on u32 to fit the refcount API. First we set explicitly to '1' when objects are created, then the objects are alive until a 1 -> 0 happens, which is then released appropriately. The above made clear some redundant operations in the u32 code around the root_ht handling that were removed. The root_ht is created with a refcnt set to 1. Then when it's associated with tcf_proto it increments the refcnt to 2. Throughout the entire code the root_ht is an exceptional case and can never be referenced, therefore the refcnt never incremented/decremented. Its lifetime is always bound to tcf_proto, meaning if you delete tcf_proto the root_ht is deleted as well. The code made up for the fact that root_ht refcnt is 2 and did a double decrement to free it, which is not a fit for the refcount API. Even though refcount_t is implemented using atomics, we should observe a negligible control plane impact. Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231114141856.974326-2-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/cls_u32.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d5bdfd4a7655..289e1755c26b 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -71,7 +71,7 @@ struct tc_u_hnode { struct tc_u_hnode __rcu *next; u32 handle; u32 prio; - int refcnt; + refcount_t refcnt; unsigned int divisor; struct idr handle_idr; bool is_root; @@ -86,7 +86,7 @@ struct tc_u_hnode { struct tc_u_common { struct tc_u_hnode __rcu *hlist; void *ptr; - int refcnt; + refcount_t refcnt; struct idr handle_idr; struct hlist_node hnode; long knodes; @@ -359,7 +359,7 @@ static int u32_init(struct tcf_proto *tp) if (root_ht == NULL) return -ENOBUFS; - root_ht->refcnt++; + refcount_set(&root_ht->refcnt, 1); root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; root_ht->prio = tp->prio; root_ht->is_root = true; @@ -371,18 +371,20 @@ static int u32_init(struct tcf_proto *tp) kfree(root_ht); return -ENOBUFS; } + refcount_set(&tp_c->refcnt, 1); tp_c->ptr = key; INIT_HLIST_NODE(&tp_c->hnode); idr_init(&tp_c->handle_idr); hlist_add_head(&tp_c->hnode, tc_u_hash(key)); + } else { + refcount_inc(&tp_c->refcnt); } - tp_c->refcnt++; RCU_INIT_POINTER(root_ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, root_ht); - root_ht->refcnt++; + /* root_ht must be destroyed when tcf_proto is destroyed */ rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; @@ -393,7 +395,7 @@ static void __u32_destroy_key(struct tc_u_knode *n) struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); tcf_exts_destroy(&n->exts); - if (ht && --ht->refcnt == 0) + if (ht && refcount_dec_and_test(&ht->refcnt)) kfree(ht); kfree(n); } @@ -601,8 +603,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; - WARN_ON(--ht->refcnt); - u32_clear_hnode(tp, ht, extack); hn = &tp_c->hlist; @@ -630,10 +630,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, WARN_ON(root_ht == NULL); - if (root_ht && --root_ht->refcnt == 1) + if (root_ht && refcount_dec_and_test(&root_ht->refcnt)) u32_destroy_hnode(tp, root_ht, extack); - if (--tp_c->refcnt == 0) { + if (refcount_dec_and_test(&tp_c->refcnt)) { struct tc_u_hnode *ht; hlist_del(&tp_c->hnode); @@ -645,7 +645,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, /* u32_destroy_key() will later free ht for us, if it's * still referenced by some knode */ - if (--ht->refcnt == 0) + if (refcount_dec_and_test(&ht->refcnt)) kfree_rcu(ht, rcu); } @@ -674,7 +674,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, return -EINVAL; } - if (ht->refcnt == 1) { + if (refcount_dec_if_one(&ht->refcnt)) { u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); @@ -682,7 +682,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, } out: - *last = tp_c->refcnt == 1 && tp_c->knodes == 0; + *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0; return ret; } @@ -766,14 +766,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, NL_SET_ERR_MSG_MOD(extack, "Not linking to root node"); return -EINVAL; } - ht_down->refcnt++; + refcount_inc(&ht_down->refcnt); } ht_old = rtnl_dereference(n->ht_down); rcu_assign_pointer(n->ht_down, ht_down); if (ht_old) - ht_old->refcnt--; + refcount_dec(&ht_old->refcnt); } if (ifindex >= 0) @@ -852,7 +852,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, /* bump reference count as long as we hold pointer to structure */ if (ht) - ht->refcnt++; + refcount_inc(&ht->refcnt); return new; } @@ -932,7 +932,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht_old = rtnl_dereference(n->ht_down); if (ht_old) - ht_old->refcnt++; + refcount_inc(&ht_old->refcnt); } __u32_destroy_key(new); return err; @@ -980,7 +980,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; } } - ht->refcnt = 1; + refcount_set(&ht->refcnt, 1); ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; From 54293e4d6a629befb0b0d93aa416a658678f7f01 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 14 Nov 2023 11:18:56 -0300 Subject: [PATCH 2/2] selftests/tc-testing: add hashtable tests for u32 Add tests to specifically check for the refcount interactions of hashtables created by u32. These tables should not be deleted when referenced and the flush order should respect a tree like composition. Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231114141856.974326-3-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/filters/u32.json | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index ddc7c355be0a..24bd0c2a3014 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -272,5 +272,62 @@ "teardown": [ "$TC qdisc del dev $DEV1 parent root drr" ] + }, + { + "id": "bd32", + "name": "Try to delete hashtable referenced by another u32 filter", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] + }, + { + "id": "4585", + "name": "Delete small tree of u32 hashtables and filters", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ]