From 24ea591d2201c3257d666466e8fac50a6cf3c52f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 05:18:03 -0700 Subject: [PATCH 1/7] net: sched: extend percpu stats helpers qdisc_bstats_update_cpu() and other helpers were added to support percpu stats for qdisc. We want to add percpu stats for tc action, so this patch add common helpers. qdisc_bstats_update_cpu() is renamed to qdisc_bstats_cpu_update() qdisc_qstats_drop_cpu() is renamed to qdisc_qstats_cpu_drop() Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Acked-by: Jamal Hadi Salim Acked-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/sch_generic.h | 31 +++++++++++++++++++++---------- net/core/dev.c | 4 ++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2738f6f87908..2eab08c38e32 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -513,17 +513,20 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats, bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; } -static inline void qdisc_bstats_update_cpu(struct Qdisc *sch, - const struct sk_buff *skb) +static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, + const struct sk_buff *skb) { - struct gnet_stats_basic_cpu *bstats = - this_cpu_ptr(sch->cpu_bstats); - u64_stats_update_begin(&bstats->syncp); bstats_update(&bstats->bstats, skb); u64_stats_update_end(&bstats->syncp); } +static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, + const struct sk_buff *skb) +{ + bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); +} + static inline void qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff *skb) { @@ -547,16 +550,24 @@ static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) sch->qstats.drops += count; } -static inline void qdisc_qstats_drop(struct Qdisc *sch) +static inline void qstats_drop_inc(struct gnet_stats_queue *qstats) { - sch->qstats.drops++; + qstats->drops++; } -static inline void qdisc_qstats_drop_cpu(struct Qdisc *sch) +static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats) { - struct gnet_stats_queue *qstats = this_cpu_ptr(sch->cpu_qstats); + qstats->overlimits++; +} - qstats->drops++; +static inline void qdisc_qstats_drop(struct Qdisc *sch) +{ + qstats_drop_inc(&sch->qstats); +} + +static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) +{ + qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats)); } static inline void qdisc_qstats_overlimit(struct Qdisc *sch) diff --git a/net/core/dev.c b/net/core/dev.c index 6778a9999d52..e0d270143fc7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3646,7 +3646,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, qdisc_skb_cb(skb)->pkt_len = skb->len; skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - qdisc_bstats_update_cpu(cl->q, skb); + qdisc_bstats_cpu_update(cl->q, skb); switch (tc_classify(skb, cl, &cl_res)) { case TC_ACT_OK: @@ -3654,7 +3654,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_drop_cpu(cl->q); + qdisc_qstats_cpu_drop(cl->q); case TC_ACT_STOLEN: case TC_ACT_QUEUED: kfree_skb(skb); From 519c818e8fb646eef1e8bfedd18519bec47bc9a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 05:18:04 -0700 Subject: [PATCH 2/7] net: sched: add percpu stats to actions Reuse existing percpu infrastructure John Fastabend added for qdisc. This patch adds a new cpustats parameter to tcf_hash_create() and all actions pass false, meaning this patch should have no effect yet. Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Acked-by: Jamal Hadi Salim Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/act_api.h | 4 +++- net/sched/act_api.c | 44 +++++++++++++++++++++++++++++++--------- net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 3 ++- net/sched/act_csum.c | 3 ++- net/sched/act_gact.c | 3 ++- net/sched/act_ipt.c | 2 +- net/sched/act_mirred.c | 3 ++- net/sched/act_nat.c | 3 ++- net/sched/act_pedit.c | 3 ++- net/sched/act_simple.c | 3 ++- net/sched/act_skbedit.c | 3 ++- net/sched/act_vlan.c | 3 ++- 13 files changed, 57 insertions(+), 22 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 3ee4c92afd1b..db2063ffd181 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -21,6 +21,8 @@ struct tcf_common { struct gnet_stats_rate_est64 tcfc_rate_est; spinlock_t tcfc_lock; struct rcu_head tcfc_rcu; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_queue __percpu *cpu_qstats; }; #define tcf_head common.tcfc_head #define tcf_index common.tcfc_index @@ -103,7 +105,7 @@ int tcf_hash_release(struct tc_action *a, int bind); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, - int size, int bind); + int size, int bind, bool cpustats); void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action *a); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index af427a3dbcba..074a32f466f8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -27,6 +27,15 @@ #include #include +static void free_tcf(struct rcu_head *head) +{ + struct tcf_common *p = container_of(head, struct tcf_common, tcfc_rcu); + + free_percpu(p->cpu_bstats); + free_percpu(p->cpu_qstats); + kfree(p); +} + void tcf_hash_destroy(struct tc_action *a) { struct tcf_common *p = a->priv; @@ -41,7 +50,7 @@ void tcf_hash_destroy(struct tc_action *a) * gen_estimator est_timer() might access p->tcfc_lock * or bstats, wait a RCU grace period before freeing p */ - kfree_rcu(p, tcfc_rcu); + call_rcu(&p->tcfc_rcu, free_tcf); } EXPORT_SYMBOL(tcf_hash_destroy); @@ -230,15 +239,16 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est) if (est) gen_kill_estimator(&pc->tcfc_bstats, &pc->tcfc_rate_est); - kfree_rcu(pc, tcfc_rcu); + call_rcu(&pc->tcfc_rcu, free_tcf); } EXPORT_SYMBOL(tcf_hash_cleanup); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, - int size, int bind) + int size, int bind, bool cpustats) { struct tcf_hashinfo *hinfo = a->ops->hinfo; struct tcf_common *p = kzalloc(size, GFP_KERNEL); + int err = -ENOMEM; if (unlikely(!p)) return -ENOMEM; @@ -246,18 +256,32 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, if (bind) p->tcfc_bindcnt = 1; + if (cpustats) { + p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + if (!p->cpu_bstats) { +err1: + kfree(p); + return err; + } + p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); + if (!p->cpu_qstats) { +err2: + free_percpu(p->cpu_bstats); + goto err1; + } + } spin_lock_init(&p->tcfc_lock); INIT_HLIST_NODE(&p->tcfc_head); p->tcfc_index = index ? index : tcf_hash_new_index(hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; if (est) { - int err = gen_new_estimator(&p->tcfc_bstats, NULL, - &p->tcfc_rate_est, - &p->tcfc_lock, est); + err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats, + &p->tcfc_rate_est, + &p->tcfc_lock, est); if (err) { - kfree(p); - return err; + free_percpu(p->cpu_qstats); + goto err2; } } @@ -615,10 +639,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (err < 0) goto errout; - if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 || + if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfc_bstats, &p->tcfc_rate_est) < 0 || - gnet_stats_copy_queue(&d, NULL, + gnet_stats_copy_queue(&d, p->cpu_qstats, &p->tcfc_qstats, p->tcfc_qstats.qlen) < 0) goto errout; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1d56903fd4c7..99aa271633e9 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -281,7 +281,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (!tcf_hash_check(parm->index, act, bind)) { ret = tcf_hash_create(parm->index, est, act, - sizeof(*prog), bind); + sizeof(*prog), bind, false); if (ret < 0) goto destroy_fp; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 295d14bd6c67..f2b540220ad0 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -108,7 +108,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_CONNMARK_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), + bind, false); if (ret) return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 4cd5cf1aedf8..b07c535ba8e7 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -62,7 +62,8 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, parm = nla_data(tb[TCA_CSUM_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*p), + bind, false); if (ret) return ret; ret = ACT_P_CREATED; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 7fffc2272701..a4f8af29ee30 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -85,7 +85,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, #endif if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), + bind, false); if (ret) return ret; ret = ACT_P_CREATED; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index cbc8dd7dd48a..99c9cc1c7af9 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -114,7 +114,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, index = nla_get_u32(tb[TCA_IPT_INDEX]); if (!tcf_hash_check(index, a, bind) ) { - ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind); + ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false); if (ret) return ret; ret = ACT_P_CREATED; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index a42a3b257226..002cd6c83dc6 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -93,7 +93,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (!tcf_hash_check(parm->index, a, bind)) { if (dev == NULL) return -EINVAL; - ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*m), + bind, false); if (ret) return ret; ret = ACT_P_CREATED; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 270a030d5fd0..5be0b3c1c5b0 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -55,7 +55,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, parm = nla_data(tb[TCA_NAT_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*p), + bind, false); if (ret) return ret; ret = ACT_P_CREATED; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 17e6d6669c7f..ce8676ad892f 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -57,7 +57,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (!tcf_hash_check(parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; - ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*p), + bind, false); if (ret) return ret; p = to_pedit(a); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 6a8d9488613a..d6b708d6afdf 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -103,7 +103,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, defdata = nla_data(tb[TCA_DEF_DATA]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*d), + bind, false); if (ret) return ret; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fcfeeaf838be..6751b5f8c046 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -99,7 +99,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SKBEDIT_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*d), + bind, false); if (ret) return ret; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index d735ecf0b1a7..796785e0bf96 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -116,7 +116,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, action = parm->v_action; if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*v), bind); + ret = tcf_hash_create(parm->index, est, a, sizeof(*v), + bind, false); if (ret) return ret; From cef5ecf96b28dc91c4e9f398a336c578fb9e1a0c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 05:18:05 -0700 Subject: [PATCH 3/7] net_sched: act_gact: make tcfg_pval non zero First step for gact RCU operation : Instead of testing if tcfg_pval is zero or not, just make it 1. No change in behavior, but slightly faster code. The smp_rmb()/smp_wmb() barriers, while not strictly needed at this stage are added for upcoming spinlock removal. Signed-off-by: Eric Dumazet Acked-by: Alexei Starovoitov Acked-by: Jamal Hadi Salim Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/act_gact.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index a4f8af29ee30..22a3a61aa090 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -28,14 +28,16 @@ #ifdef CONFIG_GACT_PROB static int gact_net_rand(struct tcf_gact *gact) { - if (!gact->tcfg_pval || prandom_u32() % gact->tcfg_pval) + smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */ + if (prandom_u32() % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } static int gact_determ(struct tcf_gact *gact) { - if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval) + smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */ + if (gact->tcf_bstats.packets % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } @@ -105,7 +107,11 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, #ifdef CONFIG_GACT_PROB if (p_parm) { gact->tcfg_paction = p_parm->paction; - gact->tcfg_pval = p_parm->pval; + gact->tcfg_pval = max_t(u16, 1, p_parm->pval); + /* Make sure tcfg_pval is written before tcfg_ptype + * coupled with smp_rmb() in gact_net_rand() & gact_determ() + */ + smp_wmb(); gact->tcfg_ptype = p_parm->ptype; } #endif From cc6510a9504fd3c03d76bd68d99653148342eecc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 05:18:06 -0700 Subject: [PATCH 4/7] net_sched: act_gact: use a separate packet counters for gact_determ() Second step for gact RCU operation : We want to get rid of the spinlock protecting gact operations. Stats (packets/bytes) will soon be per cpu. gact_determ() would not work without a central packet counter, so lets add it for this mode. Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Acked-by: Jamal Hadi Salim Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/tc_act/tc_gact.h | 7 ++++--- net/sched/act_gact.c | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 9fc9b578908a..592a6bc02b0b 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -6,9 +6,10 @@ struct tcf_gact { struct tcf_common common; #ifdef CONFIG_GACT_PROB - u16 tcfg_ptype; - u16 tcfg_pval; - int tcfg_paction; + u16 tcfg_ptype; + u16 tcfg_pval; + int tcfg_paction; + atomic_t packets; #endif }; #define to_gact(a) \ diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 22a3a61aa090..2f9bec584b3f 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -36,8 +36,10 @@ static int gact_net_rand(struct tcf_gact *gact) static int gact_determ(struct tcf_gact *gact) { + u32 pack = atomic_inc_return(&gact->packets); + smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */ - if (gact->tcf_bstats.packets % gact->tcfg_pval) + if (pack % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } From 8f2ae965b7ef4f4ddab6110f06388e270723d694 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 05:18:07 -0700 Subject: [PATCH 5/7] net_sched: act_gact: read tcfg_ptype once Third step for gact RCU operation : Following patch will get rid of spinlock protection, so we need to read tcfg_ptype once. Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Acked-by: Jamal Hadi Salim Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/act_gact.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 2f9bec584b3f..e4eb88d3d8dc 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -127,16 +127,16 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_gact *gact = a->priv; - int action = TC_ACT_SHOT; + int action = gact->tcf_action; spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (gact->tcfg_ptype) - action = gact_rand[gact->tcfg_ptype](gact); - else - action = gact->tcf_action; -#else - action = gact->tcf_action; + { + u32 ptype = READ_ONCE(gact->tcfg_ptype); + + if (ptype) + action = gact_rand[ptype](gact); + } #endif gact->tcf_bstats.bytes += qdisc_pkt_len(skb); gact->tcf_bstats.packets++; From 56e5d1ca183d8616fab377d7d466c244b4dbb3b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 05:18:08 -0700 Subject: [PATCH 6/7] net_sched: act_gact: remove spinlock in fast path Final step for gact RCU operation : 1) Use percpu stats 2) update lastuse only every clock tick to avoid false sharing 3) Remove spinlock acquisition, as it is no longer needed. Since this is the last contended lock in packet RX when tc gact is used, this gives impressive gain. My host with 8 RX queues was handling 5 Mpps before the patch, and more than 11 Mpps after patch. Tested: On receiver : dev=eth0 tc qdisc del dev $dev ingress 2>/dev/null tc qdisc add dev $dev ingress tc filter del dev $dev root pref 10 2>/dev/null tc filter del dev $dev pref 10 2>/dev/null tc filter add dev $dev est 1sec 4sec parent ffff: protocol ip prio 1 \ u32 match ip src 7.0.0.0/8 flowid 1:15 action drop Sender sends packets flood from 7/8 network Signed-off-by: Eric Dumazet Acked-by: Alexei Starovoitov Acked-by: Jamal Hadi Salim Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/act_api.h | 11 +++++++++++ net/sched/act_gact.c | 17 +++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index db2063ffd181..8d2a707a9e87 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -70,6 +70,17 @@ static inline void tcf_hashinfo_destroy(struct tcf_hashinfo *hf) kfree(hf->htab); } +/* Update lastuse only if needed, to avoid dirtying a cache line. + * We use a temp variable to avoid fetching jiffies twice. + */ +static inline void tcf_lastuse_update(struct tcf_t *tm) +{ + unsigned long now = jiffies; + + if (tm->lastuse != now) + tm->lastuse = now; +} + #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e4eb88d3d8dc..5c1b05170736 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -90,7 +90,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (!tcf_hash_check(parm->index, a, bind)) { ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), - bind, false); + bind, true); if (ret) return ret; ret = ACT_P_CREATED; @@ -104,7 +104,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, gact = to_gact(a); - spin_lock_bh(&gact->tcf_lock); + ASSERT_RTNL(); gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB if (p_parm) { @@ -117,7 +117,6 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, gact->tcfg_ptype = p_parm->ptype; } #endif - spin_unlock_bh(&gact->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(a); return ret; @@ -127,9 +126,8 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_gact *gact = a->priv; - int action = gact->tcf_action; + int action = READ_ONCE(gact->tcf_action); - spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB { u32 ptype = READ_ONCE(gact->tcfg_ptype); @@ -138,12 +136,11 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, action = gact_rand[ptype](gact); } #endif - gact->tcf_bstats.bytes += qdisc_pkt_len(skb); - gact->tcf_bstats.packets++; + bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb); if (action == TC_ACT_SHOT) - gact->tcf_qstats.drops++; - gact->tcf_tm.lastuse = jiffies; - spin_unlock(&gact->tcf_lock); + qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats)); + + tcf_lastuse_update(&gact->tcf_tm); return action; } From 2ee22a90c7afac265bb6f7abea610b938195e2b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 05:18:09 -0700 Subject: [PATCH 7/7] net_sched: act_mirred: remove spinlock in fast path Like act_gact, act_mirred can be lockless in packet processing 1) Use percpu stats 2) update lastuse only every clock tick to avoid false sharing 3) use rcu to protect tcfm_dev 4) Remove spinlock usage, as it is no longer needed. Next step : add multi queue capability to ifb device Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Cc: Jamal Hadi Salim Cc: John Fastabend Acked-by: Jamal Hadi Salim Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/tc_act/tc_mirred.h | 2 +- net/sched/act_mirred.c | 57 ++++++++++++++++++---------------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 4dd77a1c106b..dae96bae1c19 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,7 +8,7 @@ struct tcf_mirred { int tcfm_eaction; int tcfm_ifindex; int tcfm_ok_push; - struct net_device *tcfm_dev; + struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; }; #define to_mirred(a) \ diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 002cd6c83dc6..19cd8904efa0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -35,9 +35,11 @@ static LIST_HEAD(mirred_list); static void tcf_mirred_release(struct tc_action *a, int bind) { struct tcf_mirred *m = to_mirred(a); + struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1); + list_del(&m->tcfm_list); - if (m->tcfm_dev) - dev_put(m->tcfm_dev); + if (dev) + dev_put(dev); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -94,7 +96,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (dev == NULL) return -EINVAL; ret = tcf_hash_create(parm->index, est, a, sizeof(*m), - bind, false); + bind, true); if (ret) return ret; ret = ACT_P_CREATED; @@ -106,18 +108,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } m = to_mirred(a); - spin_lock_bh(&m->tcf_lock); + ASSERT_RTNL(); m->tcf_action = parm->action; m->tcfm_eaction = parm->eaction; if (dev != NULL) { m->tcfm_ifindex = parm->ifindex; if (ret != ACT_P_CREATED) - dev_put(m->tcfm_dev); + dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); dev_hold(dev); - m->tcfm_dev = dev; + rcu_assign_pointer(m->tcfm_dev, dev); m->tcfm_ok_push = ok_push; } - spin_unlock_bh(&m->tcf_lock); + if (ret == ACT_P_CREATED) { list_add(&m->tcfm_list, &mirred_list); tcf_hash_insert(a); @@ -132,20 +134,22 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, struct tcf_mirred *m = a->priv; struct net_device *dev; struct sk_buff *skb2; + int retval, err; u32 at; - int retval, err = 1; - spin_lock(&m->tcf_lock); - m->tcf_tm.lastuse = jiffies; - bstats_update(&m->tcf_bstats, skb); + tcf_lastuse_update(&m->tcf_tm); - dev = m->tcfm_dev; - if (!dev) { - printk_once(KERN_NOTICE "tc mirred: target device is gone\n"); + bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); + + rcu_read_lock(); + retval = READ_ONCE(m->tcf_action); + dev = rcu_dereference(m->tcfm_dev); + if (unlikely(!dev)) { + pr_notice_once("tc mirred: target device is gone\n"); goto out; } - if (!(dev->flags & IFF_UP)) { + if (unlikely(!(dev->flags & IFF_UP))) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); goto out; @@ -153,7 +157,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, at = G_TC_AT(skb->tc_verd); skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) + if (!skb2) goto out; if (!(at & AT_EGRESS)) { @@ -169,16 +173,13 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->dev = dev; err = dev_queue_xmit(skb2); -out: if (err) { - m->tcf_qstats.overlimits++; +out: + qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats)); if (m->tcfm_eaction != TCA_EGRESS_MIRROR) retval = TC_ACT_SHOT; - else - retval = m->tcf_action; - } else - retval = m->tcf_action; - spin_unlock(&m->tcf_lock); + } + rcu_read_unlock(); return retval; } @@ -217,14 +218,16 @@ static int mirred_device_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; + ASSERT_RTNL(); if (event == NETDEV_UNREGISTER) list_for_each_entry(m, &mirred_list, tcfm_list) { - spin_lock_bh(&m->tcf_lock); - if (m->tcfm_dev == dev) { + if (rcu_access_pointer(m->tcfm_dev) == dev) { dev_put(dev); - m->tcfm_dev = NULL; + /* Note : no rcu grace period necessary, as + * net_device are already rcu protected. + */ + RCU_INIT_POINTER(m->tcfm_dev, NULL); } - spin_unlock_bh(&m->tcf_lock); } return NOTIFY_DONE;