From 04b7d136d015f220b1003e6c573834658d507a31 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:45 +0800 Subject: [PATCH 01/10] net: openvswitch: add flow-mask cache for performance The idea of this optimization comes from a patch which is committed in 2014, openvswitch community. The author is Pravin B Shelar. In order to get high performance, I implement it again. Later patches will use it. Pravin B Shelar, says: | On every packet OVS needs to lookup flow-table with every | mask until it finds a match. The packet flow-key is first | masked with mask in the list and then the masked key is | looked up in flow-table. Therefore number of masks can | affect packet processing performance. Link: https://github.com/openvswitch/ovs/commit/5604935e4e1cbc16611d2d97f50b717aa31e8ec5 Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 3 +- net/openvswitch/flow_table.c | 111 ++++++++++++++++++++++++++++++----- net/openvswitch/flow_table.h | 11 +++- 3 files changed, 108 insertions(+), 17 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d8c364d637b1..24cb73e62f55 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) stats = this_cpu_ptr(dp->stats_percpu); /* Look up flow. */ - flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); + flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), + &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index cf3582c5ed70..3d515c072eb8 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -36,6 +36,10 @@ #define TBL_MIN_BUCKETS 1024 #define REHASH_INTERVAL (10 * 60 * HZ) +#define MC_HASH_SHIFT 8 +#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT) +#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT) + static struct kmem_cache *flow_cache; struct kmem_cache *flow_stats_cache __read_mostly; @@ -168,11 +172,16 @@ int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; - ti = table_instance_alloc(TBL_MIN_BUCKETS); - - if (!ti) + table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) * + MC_HASH_ENTRIES, + __alignof__(struct mask_cache_entry)); + if (!table->mask_cache) return -ENOMEM; + ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!ti) + goto free_mask_cache; + ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ufid_ti) goto free_ti; @@ -187,6 +196,8 @@ int ovs_flow_tbl_init(struct flow_table *table) free_ti: __table_instance_destroy(ti); +free_mask_cache: + free_percpu(table->mask_cache); return -ENOMEM; } @@ -243,6 +254,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) struct table_instance *ti = rcu_dereference_raw(table->ti); struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); + free_percpu(table->mask_cache); table_instance_destroy(ti, ufid_ti, false); } @@ -425,7 +437,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, static struct sw_flow *masked_flow_lookup(struct table_instance *ti, const struct sw_flow_key *unmasked, - const struct sw_flow_mask *mask) + const struct sw_flow_mask *mask, + u32 *n_mask_hit) { struct sw_flow *flow; struct hlist_head *head; @@ -435,6 +448,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, ovs_flow_mask_key(&masked_key, unmasked, false, mask); hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); + (*n_mask_hit)++; + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { if (flow->mask == mask && flow->flow_table.hash == hash && flow_cmp_masked_key(flow, &masked_key, &mask->range)) @@ -443,30 +458,97 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, return NULL; } -struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, - const struct sw_flow_key *key, - u32 *n_mask_hit) +static struct sw_flow *flow_lookup(struct flow_table *tbl, + struct table_instance *ti, + const struct sw_flow_key *key, + u32 *n_mask_hit) { - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct sw_flow_mask *mask; struct sw_flow *flow; - *n_mask_hit = 0; list_for_each_entry_rcu(mask, &tbl->mask_list, list) { - (*n_mask_hit)++; - flow = masked_flow_lookup(ti, key, mask); + flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) /* Found */ return flow; } return NULL; } +/* + * mask_cache maps flow to probable mask. This cache is not tightly + * coupled cache, It means updates to mask list can result in inconsistent + * cache entry in mask cache. + * This is per cpu cache and is divided in MC_HASH_SEGS segments. + * In case of a hash collision the entry is hashed in next segment. + * */ +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, + const struct sw_flow_key *key, + u32 skb_hash, + u32 *n_mask_hit) +{ + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); + struct mask_cache_entry *entries, *ce, *del; + struct sw_flow *flow; + u32 hash = skb_hash; + int seg; + + *n_mask_hit = 0; + if (unlikely(!skb_hash)) + return flow_lookup(tbl, ti, key, n_mask_hit); + + del = NULL; + entries = this_cpu_ptr(tbl->mask_cache); + + for (seg = 0; seg < MC_HASH_SEGS; seg++) { + int index; + + index = hash & (MC_HASH_ENTRIES - 1); + ce = &entries[index]; + + if (ce->skb_hash == skb_hash) { + struct sw_flow_mask *mask; + int i; + + i = 0; + list_for_each_entry_rcu(mask, &tbl->mask_list, list) { + if (ce->mask_index == i++) { + flow = masked_flow_lookup(ti, key, mask, + n_mask_hit); + if (flow) /* Found */ + return flow; + + break; + } + } + + del = ce; + break; + } + + if (!del || (del->skb_hash && !ce->skb_hash)) { + del = ce; + } + + hash >>= MC_HASH_SHIFT; + } + + flow = flow_lookup(tbl, ti, key, n_mask_hit); + + if (flow) { + del->skb_hash = skb_hash; + del->mask_index = (*n_mask_hit - 1); + } + + return flow; +} + struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, const struct sw_flow_key *key) { + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); u32 __always_unused n_mask_hit; - return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); + return flow_lookup(tbl, ti, key, &n_mask_hit); } struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, @@ -475,10 +557,11 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct sw_flow_mask *mask; struct sw_flow *flow; + u32 __always_unused n_mask_hit; /* Always called under ovs-mutex. */ list_for_each_entry(mask, &tbl->mask_list, list) { - flow = masked_flow_lookup(ti, match->key, mask); + flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit); if (flow && ovs_identifier_is_key(&flow->id) && ovs_flow_cmp_unmasked_key(flow, match)) return flow; @@ -631,7 +714,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, return -ENOMEM; mask->key = new->key; mask->range = new->range; - list_add_rcu(&mask->list, &tbl->mask_list); + list_add_tail_rcu(&mask->list, &tbl->mask_list); } else { BUG_ON(!mask->ref_count); mask->ref_count++; diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index bc52045b63ff..04b6b1c5069c 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -22,6 +22,11 @@ #include "flow.h" +struct mask_cache_entry { + u32 skb_hash; + u32 mask_index; +}; + struct table_instance { struct hlist_head *buckets; unsigned int n_buckets; @@ -34,6 +39,7 @@ struct table_instance { struct flow_table { struct table_instance __rcu *ti; struct table_instance __rcu *ufid_ti; + struct mask_cache_entry __percpu *mask_cache; struct list_head mask_list; unsigned long last_rehash; unsigned int count; @@ -60,8 +66,9 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table); struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, u32 *bucket, u32 *idx); struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, - const struct sw_flow_key *, - u32 *n_mask_hit); + const struct sw_flow_key *, + u32 skb_hash, + u32 *n_mask_hit); struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, From 4bc63b1b531df518576a97d17bf5939fdbc33ccb Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:46 +0800 Subject: [PATCH 02/10] net: openvswitch: convert mask list in mask array Port the codes to linux upstream and with little changes. Pravin B Shelar, says: | mask caches index of mask in mask_list. On packet recv OVS | need to traverse mask-list to get cached mask. Therefore array | is better for retrieving cached mask. This also allows better | cache replacement algorithm by directly checking mask's existence. Link: https://github.com/openvswitch/ovs/commit/d49fc3ff53c65e4eca9cabd52ac63396746a7ef5 Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.h | 1 - net/openvswitch/flow_table.c | 209 +++++++++++++++++++++++++++-------- net/openvswitch/flow_table.h | 8 +- 3 files changed, 167 insertions(+), 51 deletions(-) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index b830d5ff7af4..8080518ca5f2 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -166,7 +166,6 @@ struct sw_flow_key_range { struct sw_flow_mask { int ref_count; struct rcu_head rcu; - struct list_head list; struct sw_flow_key_range range; struct sw_flow_key key; }; diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 3d515c072eb8..92efa232d764 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -34,6 +34,7 @@ #include #define TBL_MIN_BUCKETS 1024 +#define MASK_ARRAY_SIZE_MIN 16 #define REHASH_INTERVAL (10 * 60 * HZ) #define MC_HASH_SHIFT 8 @@ -168,9 +169,51 @@ static struct table_instance *table_instance_alloc(int new_size) return ti; } +static struct mask_array *tbl_mask_array_alloc(int size) +{ + struct mask_array *new; + + size = max(MASK_ARRAY_SIZE_MIN, size); + new = kzalloc(sizeof(struct mask_array) + + sizeof(struct sw_flow_mask *) * size, GFP_KERNEL); + if (!new) + return NULL; + + new->count = 0; + new->max = size; + + return new; +} + +static int tbl_mask_array_realloc(struct flow_table *tbl, int size) +{ + struct mask_array *old; + struct mask_array *new; + + new = tbl_mask_array_alloc(size); + if (!new) + return -ENOMEM; + + old = ovsl_dereference(tbl->mask_array); + if (old) { + int i; + + for (i = 0; i < old->max; i++) { + if (ovsl_dereference(old->masks[i])) + new->masks[new->count++] = old->masks[i]; + } + } + + rcu_assign_pointer(tbl->mask_array, new); + kfree_rcu(old, rcu); + + return 0; +} + int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; + struct mask_array *ma; table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) * MC_HASH_ENTRIES, @@ -178,9 +221,13 @@ int ovs_flow_tbl_init(struct flow_table *table) if (!table->mask_cache) return -ENOMEM; + ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); + if (!ma) + goto free_mask_cache; + ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ti) - goto free_mask_cache; + goto free_mask_array; ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ufid_ti) @@ -188,7 +235,7 @@ int ovs_flow_tbl_init(struct flow_table *table) rcu_assign_pointer(table->ti, ti); rcu_assign_pointer(table->ufid_ti, ufid_ti); - INIT_LIST_HEAD(&table->mask_list); + rcu_assign_pointer(table->mask_array, ma); table->last_rehash = jiffies; table->count = 0; table->ufid_count = 0; @@ -196,6 +243,8 @@ int ovs_flow_tbl_init(struct flow_table *table) free_ti: __table_instance_destroy(ti); +free_mask_array: + kfree(ma); free_mask_cache: free_percpu(table->mask_cache); return -ENOMEM; @@ -255,6 +304,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); free_percpu(table->mask_cache); + kfree_rcu(rcu_dereference_raw(table->mask_array), rcu); table_instance_destroy(ti, ufid_ti, false); } @@ -460,17 +510,27 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, static struct sw_flow *flow_lookup(struct flow_table *tbl, struct table_instance *ti, + struct mask_array *ma, const struct sw_flow_key *key, - u32 *n_mask_hit) + u32 *n_mask_hit, + u32 *index) { - struct sw_flow_mask *mask; struct sw_flow *flow; + int i; - list_for_each_entry_rcu(mask, &tbl->mask_list, list) { - flow = masked_flow_lookup(ti, key, mask, n_mask_hit); - if (flow) /* Found */ - return flow; + for (i = 0; i < ma->max; i++) { + struct sw_flow_mask *mask; + + mask = rcu_dereference_ovsl(ma->masks[i]); + if (mask) { + flow = masked_flow_lookup(ti, key, mask, n_mask_hit); + if (flow) { /* Found */ + *index = i; + return flow; + } + } } + return NULL; } @@ -486,6 +546,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, u32 skb_hash, u32 *n_mask_hit) { + struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct mask_cache_entry *entries, *ce, *del; struct sw_flow *flow; @@ -493,8 +554,11 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, int seg; *n_mask_hit = 0; - if (unlikely(!skb_hash)) - return flow_lookup(tbl, ti, key, n_mask_hit); + if (unlikely(!skb_hash)) { + u32 __always_unused mask_index; + + return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index); + } del = NULL; entries = this_cpu_ptr(tbl->mask_cache); @@ -507,37 +571,33 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, if (ce->skb_hash == skb_hash) { struct sw_flow_mask *mask; - int i; + struct sw_flow *flow; - i = 0; - list_for_each_entry_rcu(mask, &tbl->mask_list, list) { - if (ce->mask_index == i++) { - flow = masked_flow_lookup(ti, key, mask, - n_mask_hit); - if (flow) /* Found */ - return flow; - - break; - } + mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]); + if (mask) { + flow = masked_flow_lookup(ti, key, mask, + n_mask_hit); + if (flow) /* Found */ + return flow; } del = ce; break; } - if (!del || (del->skb_hash && !ce->skb_hash)) { + if (!del || (del->skb_hash && !ce->skb_hash) || + (rcu_dereference_ovsl(ma->masks[del->mask_index]) && + !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) { del = ce; } hash >>= MC_HASH_SHIFT; } - flow = flow_lookup(tbl, ti, key, n_mask_hit); + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index); - if (flow) { + if (flow) del->skb_hash = skb_hash; - del->mask_index = (*n_mask_hit - 1); - } return flow; } @@ -546,26 +606,38 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, const struct sw_flow_key *key) { struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); - u32 __always_unused n_mask_hit; + struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); - return flow_lookup(tbl, ti, key, &n_mask_hit); + u32 __always_unused n_mask_hit; + u32 __always_unused index; + + return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index); } struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, const struct sw_flow_match *match) { - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); - struct sw_flow_mask *mask; - struct sw_flow *flow; - u32 __always_unused n_mask_hit; + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int i; /* Always called under ovs-mutex. */ - list_for_each_entry(mask, &tbl->mask_list, list) { + for (i = 0; i < ma->max; i++) { + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); + u32 __always_unused n_mask_hit; + struct sw_flow_mask *mask; + struct sw_flow *flow; + + mask = ovsl_dereference(ma->masks[i]); + if (!mask) + continue; + flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit); if (flow && ovs_identifier_is_key(&flow->id) && - ovs_flow_cmp_unmasked_key(flow, match)) + ovs_flow_cmp_unmasked_key(flow, match)) { return flow; + } } + return NULL; } @@ -611,13 +683,9 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, int ovs_flow_tbl_num_masks(const struct flow_table *table) { - struct sw_flow_mask *mask; - int num = 0; + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); - list_for_each_entry(mask, &table->mask_list, list) - num++; - - return num; + return ma->count; } static struct table_instance *table_instance_expand(struct table_instance *ti, @@ -638,8 +706,19 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) mask->ref_count--; if (!mask->ref_count) { - list_del_rcu(&mask->list); - kfree_rcu(mask, rcu); + struct mask_array *ma; + int i; + + ma = ovsl_dereference(tbl->mask_array); + for (i = 0; i < ma->max; i++) { + if (mask == ovsl_dereference(ma->masks[i])) { + RCU_INIT_POINTER(ma->masks[i], NULL); + ma->count--; + kfree_rcu(mask, rcu); + return; + } + } + BUG(); } } } @@ -689,13 +768,16 @@ static bool mask_equal(const struct sw_flow_mask *a, static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, const struct sw_flow_mask *mask) { - struct list_head *ml; + struct mask_array *ma; + int i; - list_for_each(ml, &tbl->mask_list) { - struct sw_flow_mask *m; - m = container_of(ml, struct sw_flow_mask, list); - if (mask_equal(mask, m)) - return m; + ma = ovsl_dereference(tbl->mask_array); + for (i = 0; i < ma->max; i++) { + struct sw_flow_mask *t; + t = ovsl_dereference(ma->masks[i]); + + if (t && mask_equal(mask, t)) + return t; } return NULL; @@ -706,15 +788,44 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, const struct sw_flow_mask *new) { struct sw_flow_mask *mask; + mask = flow_mask_find(tbl, new); if (!mask) { + struct mask_array *ma; + int i; + /* Allocate a new mask if none exsits. */ mask = mask_alloc(); if (!mask) return -ENOMEM; mask->key = new->key; mask->range = new->range; - list_add_tail_rcu(&mask->list, &tbl->mask_list); + + /* Add mask to mask-list. */ + ma = ovsl_dereference(tbl->mask_array); + if (ma->count >= ma->max) { + int err; + + err = tbl_mask_array_realloc(tbl, ma->max + + MASK_ARRAY_SIZE_MIN); + if (err) { + kfree(mask); + return err; + } + + ma = ovsl_dereference(tbl->mask_array); + } + + for (i = 0; i < ma->max; i++) { + const struct sw_flow_mask *t; + + t = ovsl_dereference(ma->masks[i]); + if (!t) { + rcu_assign_pointer(ma->masks[i], mask); + ma->count++; + break; + } + } } else { BUG_ON(!mask->ref_count); mask->ref_count++; diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 04b6b1c5069c..8a5cea6ae111 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -27,6 +27,12 @@ struct mask_cache_entry { u32 mask_index; }; +struct mask_array { + struct rcu_head rcu; + int count, max; + struct sw_flow_mask __rcu *masks[]; +}; + struct table_instance { struct hlist_head *buckets; unsigned int n_buckets; @@ -40,7 +46,7 @@ struct flow_table { struct table_instance __rcu *ti; struct table_instance __rcu *ufid_ti; struct mask_cache_entry __percpu *mask_cache; - struct list_head mask_list; + struct mask_array __rcu *mask_array; unsigned long last_rehash; unsigned int count; unsigned int ufid_count; From 1689754de624a19e37a2f96289f4421466771687 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:47 +0800 Subject: [PATCH 03/10] net: openvswitch: shrink the mask array if necessary When creating and inserting flow-mask, if there is no available flow-mask, we realloc the mask array. When removing flow-mask, if necessary, we shrink mask array. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 92efa232d764..0c0fcd644122 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -694,6 +694,23 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } +static void tbl_mask_array_delete_mask(struct mask_array *ma, + struct sw_flow_mask *mask) +{ + int i; + + /* Remove the deleted mask pointers from the array */ + for (i = 0; i < ma->max; i++) { + if (mask == ovsl_dereference(ma->masks[i])) { + RCU_INIT_POINTER(ma->masks[i], NULL); + ma->count--; + kfree_rcu(mask, rcu); + return; + } + } + BUG(); +} + /* Remove 'mask' from the mask list, if it is not needed any more. */ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) { @@ -707,18 +724,14 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) if (!mask->ref_count) { struct mask_array *ma; - int i; ma = ovsl_dereference(tbl->mask_array); - for (i = 0; i < ma->max; i++) { - if (mask == ovsl_dereference(ma->masks[i])) { - RCU_INIT_POINTER(ma->masks[i], NULL); - ma->count--; - kfree_rcu(mask, rcu); - return; - } - } - BUG(); + tbl_mask_array_delete_mask(ma, mask); + + /* Shrink the mask array if necessary. */ + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && + ma->count <= (ma->max / 3)) + tbl_mask_array_realloc(tbl, ma->max / 2); } } } From a7f35e78e701744368d4ac38bdb61a86bfac2162 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:48 +0800 Subject: [PATCH 04/10] net: openvswitch: optimize flow mask cache hash collision Port the codes to linux upstream and with little changes. Pravin B Shelar, says: | In case hash collision on mask cache, OVS does extra flow | lookup. Following patch avoid it. Link: https://github.com/openvswitch/ovs/commit/0e6efbe2712da03522532dc5e84806a96f6a0dd1 Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 93 ++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 41 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 0c0fcd644122..c7ba43524c2a 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -508,6 +508,9 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, return NULL; } +/* Flow lookup does full lookup on flow table. It starts with + * mask from index passed in *index. + */ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct table_instance *ti, struct mask_array *ma, @@ -515,19 +518,32 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, u32 *n_mask_hit, u32 *index) { + struct sw_flow_mask *mask; struct sw_flow *flow; int i; - for (i = 0; i < ma->max; i++) { - struct sw_flow_mask *mask; - - mask = rcu_dereference_ovsl(ma->masks[i]); + if (*index < ma->max) { + mask = rcu_dereference_ovsl(ma->masks[*index]); if (mask) { flow = masked_flow_lookup(ti, key, mask, n_mask_hit); - if (flow) { /* Found */ - *index = i; + if (flow) return flow; - } + } + } + + for (i = 0; i < ma->max; i++) { + + if (i == *index) + continue; + + mask = rcu_dereference_ovsl(ma->masks[i]); + if (!mask) + continue; + + flow = masked_flow_lookup(ti, key, mask, n_mask_hit); + if (flow) { /* Found */ + *index = i; + return flow; } } @@ -546,58 +562,54 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, u32 skb_hash, u32 *n_mask_hit) { - struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); - struct mask_cache_entry *entries, *ce, *del; + struct mask_array *ma = rcu_dereference(tbl->mask_array); + struct table_instance *ti = rcu_dereference(tbl->ti); + struct mask_cache_entry *entries, *ce; struct sw_flow *flow; - u32 hash = skb_hash; + u32 hash; int seg; *n_mask_hit = 0; if (unlikely(!skb_hash)) { - u32 __always_unused mask_index; + u32 mask_index = 0; return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index); } - del = NULL; + /* Pre and post recirulation flows usually have the same skb_hash + * value. To avoid hash collisions, rehash the 'skb_hash' with + * 'recirc_id'. */ + if (key->recirc_id) + skb_hash = jhash_1word(skb_hash, key->recirc_id); + + ce = NULL; + hash = skb_hash; entries = this_cpu_ptr(tbl->mask_cache); + /* Find the cache entry 'ce' to operate on. */ for (seg = 0; seg < MC_HASH_SEGS; seg++) { - int index; + int index = hash & (MC_HASH_ENTRIES - 1); + struct mask_cache_entry *e; - index = hash & (MC_HASH_ENTRIES - 1); - ce = &entries[index]; - - if (ce->skb_hash == skb_hash) { - struct sw_flow_mask *mask; - struct sw_flow *flow; - - mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]); - if (mask) { - flow = masked_flow_lookup(ti, key, mask, - n_mask_hit); - if (flow) /* Found */ - return flow; - } - - del = ce; - break; + e = &entries[index]; + if (e->skb_hash == skb_hash) { + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, + &e->mask_index); + if (!flow) + e->skb_hash = 0; + return flow; } - if (!del || (del->skb_hash && !ce->skb_hash) || - (rcu_dereference_ovsl(ma->masks[del->mask_index]) && - !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) { - del = ce; - } + if (!ce || e->skb_hash < ce->skb_hash) + ce = e; /* A better replacement cache candidate. */ hash >>= MC_HASH_SHIFT; } - flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index); - + /* Cache miss, do full lookup. */ + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index); if (flow) - del->skb_hash = skb_hash; + ce->skb_hash = skb_hash; return flow; } @@ -607,9 +619,8 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, { struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); - u32 __always_unused n_mask_hit; - u32 __always_unused index; + u32 index = 0; return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index); } From 57f7d7b9164426c496300d254fd5167fbbf205ea Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:49 +0800 Subject: [PATCH 05/10] net: openvswitch: optimize flow-mask looking up The full looking up on flow table traverses all mask array. If mask-array is too large, the number of invalid flow-mask increase, performance will be drop. One bad case, for example: M means flow-mask is valid and NULL of flow-mask means deleted. +-------------------------------------------+ | M | NULL | ... | NULL | M| +-------------------------------------------+ In that case, without this patch, openvswitch will traverses all mask array, because there will be one flow-mask in the tail. This patch changes the way of flow-mask inserting and deleting, and the mask array will be keep as below: there is not a NULL hole. In the fast path, we can "break" "for" (not "continue") in flow_lookup when we get a NULL flow-mask. "break" v +-------------------------------------------+ | M | M | NULL |... | NULL | NULL| +-------------------------------------------+ This patch don't optimize slow or control path, still using ma->max to traverse. Slow path: * tbl_mask_array_realloc * ovs_flow_tbl_lookup_exact * flow_mask_find Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 104 ++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 51 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index c7ba43524c2a..a10d421773f8 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -518,8 +518,8 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, u32 *n_mask_hit, u32 *index) { - struct sw_flow_mask *mask; struct sw_flow *flow; + struct sw_flow_mask *mask; int i; if (*index < ma->max) { @@ -538,7 +538,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, mask = rcu_dereference_ovsl(ma->masks[i]); if (!mask) - continue; + break; flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { /* Found */ @@ -695,8 +695,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); - - return ma->count; + return READ_ONCE(ma->count); } static struct table_instance *table_instance_expand(struct table_instance *ti, @@ -705,21 +704,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } -static void tbl_mask_array_delete_mask(struct mask_array *ma, - struct sw_flow_mask *mask) +static void tbl_mask_array_del_mask(struct flow_table *tbl, + struct sw_flow_mask *mask) { - int i; + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int i, ma_count = READ_ONCE(ma->count); /* Remove the deleted mask pointers from the array */ - for (i = 0; i < ma->max; i++) { - if (mask == ovsl_dereference(ma->masks[i])) { - RCU_INIT_POINTER(ma->masks[i], NULL); - ma->count--; - kfree_rcu(mask, rcu); - return; - } + for (i = 0; i < ma_count; i++) { + if (mask == ovsl_dereference(ma->masks[i])) + goto found; } + BUG(); + return; + +found: + WRITE_ONCE(ma->count, ma_count -1); + + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); + + kfree_rcu(mask, rcu); + + /* Shrink the mask array if necessary. */ + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && + ma_count <= (ma->max / 3)) + tbl_mask_array_realloc(tbl, ma->max / 2); } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -733,17 +744,8 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) BUG_ON(!mask->ref_count); mask->ref_count--; - if (!mask->ref_count) { - struct mask_array *ma; - - ma = ovsl_dereference(tbl->mask_array); - tbl_mask_array_delete_mask(ma, mask); - - /* Shrink the mask array if necessary. */ - if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && - ma->count <= (ma->max / 3)) - tbl_mask_array_realloc(tbl, ma->max / 2); - } + if (!mask->ref_count) + tbl_mask_array_del_mask(tbl, mask); } } @@ -807,6 +809,29 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, return NULL; } +static int tbl_mask_array_add_mask(struct flow_table *tbl, + struct sw_flow_mask *new) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int err, ma_count = READ_ONCE(ma->count); + + if (ma_count >= ma->max) { + err = tbl_mask_array_realloc(tbl, ma->max + + MASK_ARRAY_SIZE_MIN); + if (err) + return err; + + ma = ovsl_dereference(tbl->mask_array); + } + + BUG_ON(ovsl_dereference(ma->masks[ma_count])); + + rcu_assign_pointer(ma->masks[ma_count], new); + WRITE_ONCE(ma->count, ma_count +1); + + return 0; +} + /* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, const struct sw_flow_mask *new) @@ -815,9 +840,6 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, mask = flow_mask_find(tbl, new); if (!mask) { - struct mask_array *ma; - int i; - /* Allocate a new mask if none exsits. */ mask = mask_alloc(); if (!mask) @@ -826,29 +848,9 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, mask->range = new->range; /* Add mask to mask-list. */ - ma = ovsl_dereference(tbl->mask_array); - if (ma->count >= ma->max) { - int err; - - err = tbl_mask_array_realloc(tbl, ma->max + - MASK_ARRAY_SIZE_MIN); - if (err) { - kfree(mask); - return err; - } - - ma = ovsl_dereference(tbl->mask_array); - } - - for (i = 0; i < ma->max; i++) { - const struct sw_flow_mask *t; - - t = ovsl_dereference(ma->masks[i]); - if (!t) { - rcu_assign_pointer(ma->masks[i], mask); - ma->count++; - break; - } + if (tbl_mask_array_add_mask(tbl, mask)) { + kfree(mask); + return -ENOMEM; } } else { BUG_ON(!mask->ref_count); From 515b65a4b99197ae062a795ab4de919e6d04be04 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:50 +0800 Subject: [PATCH 06/10] net: openvswitch: simplify the flow_hash Simplify the code and remove the unnecessary BUILD_BUG_ON. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index a10d421773f8..96757e2ed256 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -432,13 +432,10 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) static u32 flow_hash(const struct sw_flow_key *key, const struct sw_flow_key_range *range) { - int key_start = range->start; - int key_end = range->end; - const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); - int hash_u32s = (key_end - key_start) >> 2; + const u32 *hash_key = (const u32 *)((const u8 *)key + range->start); /* Make sure number of hash bytes are multiple of u32. */ - BUILD_BUG_ON(sizeof(long) % sizeof(u32)); + int hash_u32s = range_n_bytes(range) >> 2; return jhash2(hash_key, hash_u32s, 0); } From 0a3e01371db17d753dd92ec4d0fc6247412d3b01 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:51 +0800 Subject: [PATCH 07/10] net: openvswitch: add likely in flow_lookup The most case *index < ma->max, and flow-mask is not NULL. We add un/likely for performance. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 96757e2ed256..9f5a06e5d974 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -519,7 +519,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct sw_flow_mask *mask; int i; - if (*index < ma->max) { + if (likely(*index < ma->max)) { mask = rcu_dereference_ovsl(ma->masks[*index]); if (mask) { flow = masked_flow_lookup(ti, key, mask, n_mask_hit); @@ -534,7 +534,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, continue; mask = rcu_dereference_ovsl(ma->masks[i]); - if (!mask) + if (unlikely(!mask)) break; flow = masked_flow_lookup(ti, key, mask, n_mask_hit); From 50b0e61b32ee890a75b4377d5fbe770a86d6a4c1 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:52 +0800 Subject: [PATCH 08/10] net: openvswitch: fix possible memleak on destroy flow-table When we destroy the flow tables which may contain the flow_mask, so release the flow mask struct. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 186 ++++++++++++++++++----------------- 1 file changed, 98 insertions(+), 88 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 9f5a06e5d974..5904e93e5765 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -210,6 +210,74 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size) return 0; } +static int tbl_mask_array_add_mask(struct flow_table *tbl, + struct sw_flow_mask *new) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int err, ma_count = READ_ONCE(ma->count); + + if (ma_count >= ma->max) { + err = tbl_mask_array_realloc(tbl, ma->max + + MASK_ARRAY_SIZE_MIN); + if (err) + return err; + + ma = ovsl_dereference(tbl->mask_array); + } + + BUG_ON(ovsl_dereference(ma->masks[ma_count])); + + rcu_assign_pointer(ma->masks[ma_count], new); + WRITE_ONCE(ma->count, ma_count +1); + + return 0; +} + +static void tbl_mask_array_del_mask(struct flow_table *tbl, + struct sw_flow_mask *mask) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int i, ma_count = READ_ONCE(ma->count); + + /* Remove the deleted mask pointers from the array */ + for (i = 0; i < ma_count; i++) { + if (mask == ovsl_dereference(ma->masks[i])) + goto found; + } + + BUG(); + return; + +found: + WRITE_ONCE(ma->count, ma_count -1); + + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); + + kfree_rcu(mask, rcu); + + /* Shrink the mask array if necessary. */ + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && + ma_count <= (ma->max / 3)) + tbl_mask_array_realloc(tbl, ma->max / 2); +} + +/* Remove 'mask' from the mask list, if it is not needed any more. */ +static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) +{ + if (mask) { + /* ovs-lock is required to protect mask-refcount and + * mask list. + */ + ASSERT_OVSL(); + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) + tbl_mask_array_del_mask(tbl, mask); + } +} + int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; @@ -257,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) __table_instance_destroy(ti); } -static void table_instance_destroy(struct table_instance *ti, +static void table_instance_flow_free(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti, + struct sw_flow *flow, + bool count) +{ + hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); + if (count) + table->count--; + + if (ovs_identifier_is_ufid(&flow->id)) { + hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); + + if (count) + table->ufid_count--; + } + + flow_mask_remove(table, flow->mask); +} + +static void table_instance_destroy(struct flow_table *table, + struct table_instance *ti, struct table_instance *ufid_ti, bool deferred) { @@ -274,13 +363,12 @@ static void table_instance_destroy(struct table_instance *ti, struct sw_flow *flow; struct hlist_head *head = &ti->buckets[i]; struct hlist_node *n; - int ver = ti->node_ver; - int ufid_ver = ufid_ti->node_ver; - hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { - hlist_del_rcu(&flow->flow_table.node[ver]); - if (ovs_identifier_is_ufid(&flow->id)) - hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); + hlist_for_each_entry_safe(flow, n, head, + flow_table.node[ti->node_ver]) { + + table_instance_flow_free(table, ti, ufid_ti, + flow, false); ovs_flow_free(flow, deferred); } } @@ -305,7 +393,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) free_percpu(table->mask_cache); kfree_rcu(rcu_dereference_raw(table->mask_array), rcu); - table_instance_destroy(ti, ufid_ti, false); + table_instance_destroy(table, ti, ufid_ti, false); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -421,7 +509,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) flow_table->count = 0; flow_table->ufid_count = 0; - table_instance_destroy(old_ti, old_ufid_ti, true); + table_instance_destroy(flow_table, old_ti, old_ufid_ti, true); return 0; err_free_ti: @@ -701,51 +789,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } -static void tbl_mask_array_del_mask(struct flow_table *tbl, - struct sw_flow_mask *mask) -{ - struct mask_array *ma = ovsl_dereference(tbl->mask_array); - int i, ma_count = READ_ONCE(ma->count); - - /* Remove the deleted mask pointers from the array */ - for (i = 0; i < ma_count; i++) { - if (mask == ovsl_dereference(ma->masks[i])) - goto found; - } - - BUG(); - return; - -found: - WRITE_ONCE(ma->count, ma_count -1); - - rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); - RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); - - kfree_rcu(mask, rcu); - - /* Shrink the mask array if necessary. */ - if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && - ma_count <= (ma->max / 3)) - tbl_mask_array_realloc(tbl, ma->max / 2); -} - -/* Remove 'mask' from the mask list, if it is not needed any more. */ -static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) -{ - if (mask) { - /* ovs-lock is required to protect mask-refcount and - * mask list. - */ - ASSERT_OVSL(); - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) - tbl_mask_array_del_mask(tbl, mask); - } -} - /* Must be called with OVS mutex held. */ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { @@ -753,17 +796,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); BUG_ON(table->count == 0); - hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); - table->count--; - if (ovs_identifier_is_ufid(&flow->id)) { - hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); - table->ufid_count--; - } - - /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be - * accessible as long as the RCU read lock is held. - */ - flow_mask_remove(table, flow->mask); + table_instance_flow_free(table, ti, ufid_ti, flow, true); } static struct sw_flow_mask *mask_alloc(void) @@ -806,29 +839,6 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, return NULL; } -static int tbl_mask_array_add_mask(struct flow_table *tbl, - struct sw_flow_mask *new) -{ - struct mask_array *ma = ovsl_dereference(tbl->mask_array); - int err, ma_count = READ_ONCE(ma->count); - - if (ma_count >= ma->max) { - err = tbl_mask_array_realloc(tbl, ma->max + - MASK_ARRAY_SIZE_MIN); - if (err) - return err; - - ma = ovsl_dereference(tbl->mask_array); - } - - BUG_ON(ovsl_dereference(ma->masks[ma_count])); - - rcu_assign_pointer(ma->masks[ma_count], new); - WRITE_ONCE(ma->count, ma_count +1); - - return 0; -} - /* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, const struct sw_flow_mask *new) From 4c76bf696a608ea5cc555fe97ec59a9033236604 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:53 +0800 Subject: [PATCH 09/10] net: openvswitch: don't unlock mutex when changing the user_features fails Unlocking of a not locked mutex is not allowed. Other kernel thread may be in critical section while we unlock it because of setting user_feature fail. Fixes: 95a7233c4 ("net: openvswitch: Set OvS recirc_id from tc chain index") Cc: Paul Blakey Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 24cb73e62f55..745033e261aa 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1657,6 +1657,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } + ovs_unlock(); goto err_destroy_meters; } @@ -1673,7 +1674,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) return 0; err_destroy_meters: - ovs_unlock(); ovs_meters_exit(dp); err_destroy_ports_array: kfree(dp->ports); From eec62eadd1d757b0743ccbde55973814f3ad396e Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 1 Nov 2019 22:23:54 +0800 Subject: [PATCH 10/10] net: openvswitch: simplify the ovs_dp_cmd_new use the specified functions to init resource. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 60 ++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 745033e261aa..2088619c03f0 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1576,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) return 0; } +static int ovs_dp_stats_init(struct datapath *dp) +{ + dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); + if (!dp->stats_percpu) + return -ENOMEM; + + return 0; +} + +static int ovs_dp_vport_init(struct datapath *dp) +{ + int i; + + dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!dp->ports) + return -ENOMEM; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->ports[i]); + + return 0; +} + static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1584,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; - int err, i; + int err; err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) @@ -1597,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_free_reply; + goto err_destroy_reply; ovs_dp_set_net(dp, sock_net(skb->sk)); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); if (err) - goto err_free_dp; + goto err_destroy_dp; - dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); - if (!dp->stats_percpu) { - err = -ENOMEM; + err = ovs_dp_stats_init(dp); + if (err) goto err_destroy_table; - } - dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, - sizeof(struct hlist_head), - GFP_KERNEL); - if (!dp->ports) { - err = -ENOMEM; - goto err_destroy_percpu; - } - - for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(&dp->ports[i]); + err = ovs_dp_vport_init(dp); + if (err) + goto err_destroy_stats; err = ovs_meters_init(dp); if (err) - goto err_destroy_ports_array; + goto err_destroy_ports; /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); @@ -1675,15 +1691,15 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) err_destroy_meters: ovs_meters_exit(dp); -err_destroy_ports_array: +err_destroy_ports: kfree(dp->ports); -err_destroy_percpu: +err_destroy_stats: free_percpu(dp->stats_percpu); err_destroy_table: ovs_flow_tbl_destroy(&dp->table); -err_free_dp: +err_destroy_dp: kfree(dp); -err_free_reply: +err_destroy_reply: kfree_skb(reply); err: return err;