From c4c6bc314618f60ba69b0cbf93e506e4c38a11d2 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Sun, 30 Aug 2015 11:29:41 +0530 Subject: [PATCH 1/2] net: Introduce helper functions to get the per cpu data Signed-off-by: Raghavendra K T Signed-off-by: David S. Miller --- include/net/ip.h | 10 ++++++++++ net/ipv4/af_inet.c | 41 +++++++++++++++++++++++++++-------------- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 7b9e1c782aa3..9b9ca2839399 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -202,10 +202,20 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, #define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd) +u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); unsigned long snmp_fold_field(void __percpu *mib, int offt); #if BITS_PER_LONG==32 +u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, + size_t syncp_offset); u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off); #else +static inline u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, + size_t syncp_offset) +{ + return snmp_get_cpu_field(mib, cpu, offct); + +} + static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off) { return snmp_fold_field(mib, offt); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 675e88cac2b4..0c69c0bbe1a1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1452,38 +1452,51 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); +u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offt) +{ + return *(((unsigned long *)per_cpu_ptr(mib, cpu)) + offt); +} +EXPORT_SYMBOL_GPL(snmp_get_cpu_field); + unsigned long snmp_fold_field(void __percpu *mib, int offt) { unsigned long res = 0; int i; for_each_possible_cpu(i) - res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt); + res += snmp_get_cpu_field(mib, i, offt); return res; } EXPORT_SYMBOL_GPL(snmp_fold_field); #if BITS_PER_LONG==32 +u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, + size_t syncp_offset) +{ + void *bhptr; + struct u64_stats_sync *syncp; + u64 v; + unsigned int start; + + bhptr = per_cpu_ptr(mib, cpu); + syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); + do { + start = u64_stats_fetch_begin_irq(syncp); + v = *(((u64 *)bhptr) + offt); + } while (u64_stats_fetch_retry_irq(syncp, start)); + + return v; +} +EXPORT_SYMBOL_GPL(snmp_get_cpu_field64); + u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset) { u64 res = 0; int cpu; for_each_possible_cpu(cpu) { - void *bhptr; - struct u64_stats_sync *syncp; - u64 v; - unsigned int start; - - bhptr = per_cpu_ptr(mib, cpu); - syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); - do { - start = u64_stats_fetch_begin_irq(syncp); - v = *(((u64 *) bhptr) + offt); - } while (u64_stats_fetch_retry_irq(syncp, start)); - - res += v; + res += snmp_get_cpu_field(mib, cpu, offct, syncp_offset); } return res; } From a3a773726c9f9ba2e87fd8ad8e36feff5f6ffd8e Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Sun, 30 Aug 2015 11:29:42 +0530 Subject: [PATCH 2/2] net: Optimize snmp stat aggregation by walking all the percpu data at once Docker container creation linearly increased from around 1.6 sec to 7.5 sec (at 1000 containers) and perf data showed 50% ovehead in snmp_fold_field. reason: currently __snmp6_fill_stats64 calls snmp_fold_field that walks through per cpu data of an item (iteratively for around 36 items). idea: This patch tries to aggregate the statistics by going through all the items of each cpu sequentially which is reducing cache misses. Docker creation got faster by more than 2x after the patch. Result: Before After Docker creation time 6.836s 3.25s cache miss 2.7% 1.41% perf before: 50.73% docker [kernel.kallsyms] [k] snmp_fold_field 9.07% swapper [kernel.kallsyms] [k] snooze_loop 3.49% docker [kernel.kallsyms] [k] veth_stats_one 2.85% swapper [kernel.kallsyms] [k] _raw_spin_lock perf after: 10.57% docker docker [.] scanblock 8.37% swapper [kernel.kallsyms] [k] snooze_loop 6.91% docker [kernel.kallsyms] [k] snmp_get_cpu_field 6.67% docker [kernel.kallsyms] [k] veth_stats_one changes/ideas suggested: Using buffer in stack (Eric), Usage of memset (David), Using memcpy in place of unaligned_put (Joe). Signed-off-by: Raghavendra K T Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0f08d3b9e238..99c0f2b843f0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4726,18 +4726,24 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, - int items, int bytes, size_t syncpoff) + int bytes, size_t syncpoff) { - int i; - int pad = bytes - sizeof(u64) * items; + int i, c; + u64 buff[IPSTATS_MIB_MAX]; + int pad = bytes - sizeof(u64) * IPSTATS_MIB_MAX; + BUG_ON(pad < 0); - /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, &stats[0]); - for (i = 1; i < items; i++) - put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]); + memset(buff, 0, sizeof(buff)); + buff[0] = IPSTATS_MIB_MAX; - memset(&stats[items], 0, pad); + for_each_possible_cpu(c) { + for (i = 1; i < IPSTATS_MIB_MAX; i++) + buff[i] += snmp_get_cpu_field64(mib, c, i, syncpoff); + } + + memcpy(stats, buff, IPSTATS_MIB_MAX * sizeof(u64)); + memset(&stats[IPSTATS_MIB_MAX], 0, pad); } static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, @@ -4745,8 +4751,8 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, { switch (attrtype) { case IFLA_INET6_STATS: - __snmp6_fill_stats64(stats, idev->stats.ipv6, - IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); + __snmp6_fill_stats64(stats, idev->stats.ipv6, bytes, + offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);