mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-03 10:02:20 -04:00
Merge branch 'optimize-openvswitch-flow-looking-up'
Tonghao Zhang says:
====================
optimize openvswitch flow looking up
This series patch optimize openvswitch for performance or simplify
codes.
Patch 1, 2, 4: Port Pravin B Shelar patches to
linux upstream with little changes.
Patch 5, 6, 7: Optimize the flow looking up and
simplify the flow hash.
Patch 8, 9: are bugfix.
The performance test is on Intel Xeon E5-2630 v4.
The test topology is show as below:
+-----------------------------------+
| +---------------------------+ |
| | eth0 ovs-switch eth1 | | Host0
| +---------------------------+ |
+-----------------------------------+
^ |
| |
| |
| |
| v
+-----+----+ +----+-----+
| netperf | Host1 | netserver| Host2
+----------+ +----------+
We use netperf send the 64B packets, and insert 255+ flow-mask:
$ ovs-dpctl add-flow ovs-switch "in_port(1),eth(dst=00:01:00:00:00:00/ff:ff:ff:ff:ff:01),eth_type(0x0800),ipv4(frag=no)" 2
...
$ ovs-dpctl add-flow ovs-switch "in_port(1),eth(dst=00:ff:00:00:00:00/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)" 2
$
$ netperf -t UDP_STREAM -H 2.2.2.200 -l 40 -- -m 18
* Without series patch, throughput 8.28Mbps
* With series patch, throughput 46.05Mbps
v6:
some coding style fixes
v5:
rewrite patch 8, release flow-mask when freeing flow
v4:
access ma->count with READ_ONCE/WRITE_ONCE API. More information,
see patch 5 comments.
v3:
update ma point when realloc mask_array in patch 5
v2:
simplify codes. e.g. use kfree_rcu instead of call_rcu
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
|
||||
stats = this_cpu_ptr(dp->stats_percpu);
|
||||
|
||||
/* Look up flow. */
|
||||
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
|
||||
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
|
||||
&n_mask_hit);
|
||||
if (unlikely(!flow)) {
|
||||
struct dp_upcall_info upcall;
|
||||
|
||||
@@ -1575,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovs_dp_stats_init(struct datapath *dp)
|
||||
{
|
||||
dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
|
||||
if (!dp->stats_percpu)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovs_dp_vport_init(struct datapath *dp)
|
||||
{
|
||||
int i;
|
||||
|
||||
dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
|
||||
sizeof(struct hlist_head),
|
||||
GFP_KERNEL);
|
||||
if (!dp->ports)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
|
||||
INIT_HLIST_HEAD(&dp->ports[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
struct nlattr **a = info->attrs;
|
||||
@@ -1583,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
|
||||
struct datapath *dp;
|
||||
struct vport *vport;
|
||||
struct ovs_net *ovs_net;
|
||||
int err, i;
|
||||
int err;
|
||||
|
||||
err = -EINVAL;
|
||||
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
|
||||
@@ -1596,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
|
||||
err = -ENOMEM;
|
||||
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
|
||||
if (dp == NULL)
|
||||
goto err_free_reply;
|
||||
goto err_destroy_reply;
|
||||
|
||||
ovs_dp_set_net(dp, sock_net(skb->sk));
|
||||
|
||||
/* Allocate table. */
|
||||
err = ovs_flow_tbl_init(&dp->table);
|
||||
if (err)
|
||||
goto err_free_dp;
|
||||
goto err_destroy_dp;
|
||||
|
||||
dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
|
||||
if (!dp->stats_percpu) {
|
||||
err = -ENOMEM;
|
||||
err = ovs_dp_stats_init(dp);
|
||||
if (err)
|
||||
goto err_destroy_table;
|
||||
}
|
||||
|
||||
dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
|
||||
sizeof(struct hlist_head),
|
||||
GFP_KERNEL);
|
||||
if (!dp->ports) {
|
||||
err = -ENOMEM;
|
||||
goto err_destroy_percpu;
|
||||
}
|
||||
|
||||
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
|
||||
INIT_HLIST_HEAD(&dp->ports[i]);
|
||||
err = ovs_dp_vport_init(dp);
|
||||
if (err)
|
||||
goto err_destroy_stats;
|
||||
|
||||
err = ovs_meters_init(dp);
|
||||
if (err)
|
||||
goto err_destroy_ports_array;
|
||||
goto err_destroy_ports;
|
||||
|
||||
/* Set up our datapath device. */
|
||||
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
|
||||
@@ -1656,6 +1673,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
|
||||
ovs_dp_reset_user_features(skb, info);
|
||||
}
|
||||
|
||||
ovs_unlock();
|
||||
goto err_destroy_meters;
|
||||
}
|
||||
|
||||
@@ -1672,17 +1690,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
|
||||
return 0;
|
||||
|
||||
err_destroy_meters:
|
||||
ovs_unlock();
|
||||
ovs_meters_exit(dp);
|
||||
err_destroy_ports_array:
|
||||
err_destroy_ports:
|
||||
kfree(dp->ports);
|
||||
err_destroy_percpu:
|
||||
err_destroy_stats:
|
||||
free_percpu(dp->stats_percpu);
|
||||
err_destroy_table:
|
||||
ovs_flow_tbl_destroy(&dp->table);
|
||||
err_free_dp:
|
||||
err_destroy_dp:
|
||||
kfree(dp);
|
||||
err_free_reply:
|
||||
err_destroy_reply:
|
||||
kfree_skb(reply);
|
||||
err:
|
||||
return err;
|
||||
|
||||
@@ -166,7 +166,6 @@ struct sw_flow_key_range {
|
||||
struct sw_flow_mask {
|
||||
int ref_count;
|
||||
struct rcu_head rcu;
|
||||
struct list_head list;
|
||||
struct sw_flow_key_range range;
|
||||
struct sw_flow_key key;
|
||||
};
|
||||
|
||||
@@ -34,8 +34,13 @@
|
||||
#include <net/ndisc.h>
|
||||
|
||||
#define TBL_MIN_BUCKETS 1024
|
||||
#define MASK_ARRAY_SIZE_MIN 16
|
||||
#define REHASH_INTERVAL (10 * 60 * HZ)
|
||||
|
||||
#define MC_HASH_SHIFT 8
|
||||
#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
|
||||
#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
|
||||
|
||||
static struct kmem_cache *flow_cache;
|
||||
struct kmem_cache *flow_stats_cache __read_mostly;
|
||||
|
||||
@@ -164,14 +169,133 @@ static struct table_instance *table_instance_alloc(int new_size)
|
||||
return ti;
|
||||
}
|
||||
|
||||
static struct mask_array *tbl_mask_array_alloc(int size)
|
||||
{
|
||||
struct mask_array *new;
|
||||
|
||||
size = max(MASK_ARRAY_SIZE_MIN, size);
|
||||
new = kzalloc(sizeof(struct mask_array) +
|
||||
sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
|
||||
if (!new)
|
||||
return NULL;
|
||||
|
||||
new->count = 0;
|
||||
new->max = size;
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
|
||||
{
|
||||
struct mask_array *old;
|
||||
struct mask_array *new;
|
||||
|
||||
new = tbl_mask_array_alloc(size);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
old = ovsl_dereference(tbl->mask_array);
|
||||
if (old) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < old->max; i++) {
|
||||
if (ovsl_dereference(old->masks[i]))
|
||||
new->masks[new->count++] = old->masks[i];
|
||||
}
|
||||
}
|
||||
|
||||
rcu_assign_pointer(tbl->mask_array, new);
|
||||
kfree_rcu(old, rcu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tbl_mask_array_add_mask(struct flow_table *tbl,
|
||||
struct sw_flow_mask *new)
|
||||
{
|
||||
struct mask_array *ma = ovsl_dereference(tbl->mask_array);
|
||||
int err, ma_count = READ_ONCE(ma->count);
|
||||
|
||||
if (ma_count >= ma->max) {
|
||||
err = tbl_mask_array_realloc(tbl, ma->max +
|
||||
MASK_ARRAY_SIZE_MIN);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ma = ovsl_dereference(tbl->mask_array);
|
||||
}
|
||||
|
||||
BUG_ON(ovsl_dereference(ma->masks[ma_count]));
|
||||
|
||||
rcu_assign_pointer(ma->masks[ma_count], new);
|
||||
WRITE_ONCE(ma->count, ma_count +1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void tbl_mask_array_del_mask(struct flow_table *tbl,
|
||||
struct sw_flow_mask *mask)
|
||||
{
|
||||
struct mask_array *ma = ovsl_dereference(tbl->mask_array);
|
||||
int i, ma_count = READ_ONCE(ma->count);
|
||||
|
||||
/* Remove the deleted mask pointers from the array */
|
||||
for (i = 0; i < ma_count; i++) {
|
||||
if (mask == ovsl_dereference(ma->masks[i]))
|
||||
goto found;
|
||||
}
|
||||
|
||||
BUG();
|
||||
return;
|
||||
|
||||
found:
|
||||
WRITE_ONCE(ma->count, ma_count -1);
|
||||
|
||||
rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
|
||||
RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
|
||||
|
||||
kfree_rcu(mask, rcu);
|
||||
|
||||
/* Shrink the mask array if necessary. */
|
||||
if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
|
||||
ma_count <= (ma->max / 3))
|
||||
tbl_mask_array_realloc(tbl, ma->max / 2);
|
||||
}
|
||||
|
||||
/* Remove 'mask' from the mask list, if it is not needed any more. */
|
||||
static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
|
||||
{
|
||||
if (mask) {
|
||||
/* ovs-lock is required to protect mask-refcount and
|
||||
* mask list.
|
||||
*/
|
||||
ASSERT_OVSL();
|
||||
BUG_ON(!mask->ref_count);
|
||||
mask->ref_count--;
|
||||
|
||||
if (!mask->ref_count)
|
||||
tbl_mask_array_del_mask(tbl, mask);
|
||||
}
|
||||
}
|
||||
|
||||
int ovs_flow_tbl_init(struct flow_table *table)
|
||||
{
|
||||
struct table_instance *ti, *ufid_ti;
|
||||
struct mask_array *ma;
|
||||
|
||||
table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
|
||||
MC_HASH_ENTRIES,
|
||||
__alignof__(struct mask_cache_entry));
|
||||
if (!table->mask_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
|
||||
if (!ma)
|
||||
goto free_mask_cache;
|
||||
|
||||
ti = table_instance_alloc(TBL_MIN_BUCKETS);
|
||||
|
||||
if (!ti)
|
||||
return -ENOMEM;
|
||||
goto free_mask_array;
|
||||
|
||||
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
|
||||
if (!ufid_ti)
|
||||
@@ -179,7 +303,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
|
||||
|
||||
rcu_assign_pointer(table->ti, ti);
|
||||
rcu_assign_pointer(table->ufid_ti, ufid_ti);
|
||||
INIT_LIST_HEAD(&table->mask_list);
|
||||
rcu_assign_pointer(table->mask_array, ma);
|
||||
table->last_rehash = jiffies;
|
||||
table->count = 0;
|
||||
table->ufid_count = 0;
|
||||
@@ -187,6 +311,10 @@ int ovs_flow_tbl_init(struct flow_table *table)
|
||||
|
||||
free_ti:
|
||||
__table_instance_destroy(ti);
|
||||
free_mask_array:
|
||||
kfree(ma);
|
||||
free_mask_cache:
|
||||
free_percpu(table->mask_cache);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -197,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
|
||||
__table_instance_destroy(ti);
|
||||
}
|
||||
|
||||
static void table_instance_destroy(struct table_instance *ti,
|
||||
static void table_instance_flow_free(struct flow_table *table,
|
||||
struct table_instance *ti,
|
||||
struct table_instance *ufid_ti,
|
||||
struct sw_flow *flow,
|
||||
bool count)
|
||||
{
|
||||
hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
|
||||
if (count)
|
||||
table->count--;
|
||||
|
||||
if (ovs_identifier_is_ufid(&flow->id)) {
|
||||
hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
|
||||
|
||||
if (count)
|
||||
table->ufid_count--;
|
||||
}
|
||||
|
||||
flow_mask_remove(table, flow->mask);
|
||||
}
|
||||
|
||||
static void table_instance_destroy(struct flow_table *table,
|
||||
struct table_instance *ti,
|
||||
struct table_instance *ufid_ti,
|
||||
bool deferred)
|
||||
{
|
||||
@@ -214,13 +363,12 @@ static void table_instance_destroy(struct table_instance *ti,
|
||||
struct sw_flow *flow;
|
||||
struct hlist_head *head = &ti->buckets[i];
|
||||
struct hlist_node *n;
|
||||
int ver = ti->node_ver;
|
||||
int ufid_ver = ufid_ti->node_ver;
|
||||
|
||||
hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
|
||||
hlist_del_rcu(&flow->flow_table.node[ver]);
|
||||
if (ovs_identifier_is_ufid(&flow->id))
|
||||
hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
|
||||
hlist_for_each_entry_safe(flow, n, head,
|
||||
flow_table.node[ti->node_ver]) {
|
||||
|
||||
table_instance_flow_free(table, ti, ufid_ti,
|
||||
flow, false);
|
||||
ovs_flow_free(flow, deferred);
|
||||
}
|
||||
}
|
||||
@@ -243,7 +391,9 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
|
||||
struct table_instance *ti = rcu_dereference_raw(table->ti);
|
||||
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
|
||||
|
||||
table_instance_destroy(ti, ufid_ti, false);
|
||||
free_percpu(table->mask_cache);
|
||||
kfree_rcu(rcu_dereference_raw(table->mask_array), rcu);
|
||||
table_instance_destroy(table, ti, ufid_ti, false);
|
||||
}
|
||||
|
||||
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
|
||||
@@ -359,7 +509,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
|
||||
flow_table->count = 0;
|
||||
flow_table->ufid_count = 0;
|
||||
|
||||
table_instance_destroy(old_ti, old_ufid_ti, true);
|
||||
table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
|
||||
return 0;
|
||||
|
||||
err_free_ti:
|
||||
@@ -370,13 +520,10 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
|
||||
static u32 flow_hash(const struct sw_flow_key *key,
|
||||
const struct sw_flow_key_range *range)
|
||||
{
|
||||
int key_start = range->start;
|
||||
int key_end = range->end;
|
||||
const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
|
||||
int hash_u32s = (key_end - key_start) >> 2;
|
||||
const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
|
||||
|
||||
/* Make sure number of hash bytes are multiple of u32. */
|
||||
BUILD_BUG_ON(sizeof(long) % sizeof(u32));
|
||||
int hash_u32s = range_n_bytes(range) >> 2;
|
||||
|
||||
return jhash2(hash_key, hash_u32s, 0);
|
||||
}
|
||||
@@ -425,7 +572,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
|
||||
|
||||
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
|
||||
const struct sw_flow_key *unmasked,
|
||||
const struct sw_flow_mask *mask)
|
||||
const struct sw_flow_mask *mask,
|
||||
u32 *n_mask_hit)
|
||||
{
|
||||
struct sw_flow *flow;
|
||||
struct hlist_head *head;
|
||||
@@ -435,6 +583,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
|
||||
ovs_flow_mask_key(&masked_key, unmasked, false, mask);
|
||||
hash = flow_hash(&masked_key, &mask->range);
|
||||
head = find_bucket(ti, hash);
|
||||
(*n_mask_hit)++;
|
||||
|
||||
hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
|
||||
if (flow->mask == mask && flow->flow_table.hash == hash &&
|
||||
flow_cmp_masked_key(flow, &masked_key, &mask->range))
|
||||
@@ -443,46 +593,147 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
|
||||
const struct sw_flow_key *key,
|
||||
u32 *n_mask_hit)
|
||||
/* Flow lookup does full lookup on flow table. It starts with
|
||||
* mask from index passed in *index.
|
||||
*/
|
||||
static struct sw_flow *flow_lookup(struct flow_table *tbl,
|
||||
struct table_instance *ti,
|
||||
struct mask_array *ma,
|
||||
const struct sw_flow_key *key,
|
||||
u32 *n_mask_hit,
|
||||
u32 *index)
|
||||
{
|
||||
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
|
||||
struct sw_flow_mask *mask;
|
||||
struct sw_flow *flow;
|
||||
struct sw_flow_mask *mask;
|
||||
int i;
|
||||
|
||||
if (likely(*index < ma->max)) {
|
||||
mask = rcu_dereference_ovsl(ma->masks[*index]);
|
||||
if (mask) {
|
||||
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
|
||||
if (flow)
|
||||
return flow;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ma->max; i++) {
|
||||
|
||||
if (i == *index)
|
||||
continue;
|
||||
|
||||
mask = rcu_dereference_ovsl(ma->masks[i]);
|
||||
if (unlikely(!mask))
|
||||
break;
|
||||
|
||||
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
|
||||
if (flow) { /* Found */
|
||||
*index = i;
|
||||
return flow;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* mask_cache maps flow to probable mask. This cache is not tightly
|
||||
* coupled cache, It means updates to mask list can result in inconsistent
|
||||
* cache entry in mask cache.
|
||||
* This is per cpu cache and is divided in MC_HASH_SEGS segments.
|
||||
* In case of a hash collision the entry is hashed in next segment.
|
||||
* */
|
||||
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
|
||||
const struct sw_flow_key *key,
|
||||
u32 skb_hash,
|
||||
u32 *n_mask_hit)
|
||||
{
|
||||
struct mask_array *ma = rcu_dereference(tbl->mask_array);
|
||||
struct table_instance *ti = rcu_dereference(tbl->ti);
|
||||
struct mask_cache_entry *entries, *ce;
|
||||
struct sw_flow *flow;
|
||||
u32 hash;
|
||||
int seg;
|
||||
|
||||
*n_mask_hit = 0;
|
||||
list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
|
||||
(*n_mask_hit)++;
|
||||
flow = masked_flow_lookup(ti, key, mask);
|
||||
if (flow) /* Found */
|
||||
return flow;
|
||||
if (unlikely(!skb_hash)) {
|
||||
u32 mask_index = 0;
|
||||
|
||||
return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
|
||||
}
|
||||
return NULL;
|
||||
|
||||
/* Pre and post recirulation flows usually have the same skb_hash
|
||||
* value. To avoid hash collisions, rehash the 'skb_hash' with
|
||||
* 'recirc_id'. */
|
||||
if (key->recirc_id)
|
||||
skb_hash = jhash_1word(skb_hash, key->recirc_id);
|
||||
|
||||
ce = NULL;
|
||||
hash = skb_hash;
|
||||
entries = this_cpu_ptr(tbl->mask_cache);
|
||||
|
||||
/* Find the cache entry 'ce' to operate on. */
|
||||
for (seg = 0; seg < MC_HASH_SEGS; seg++) {
|
||||
int index = hash & (MC_HASH_ENTRIES - 1);
|
||||
struct mask_cache_entry *e;
|
||||
|
||||
e = &entries[index];
|
||||
if (e->skb_hash == skb_hash) {
|
||||
flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
|
||||
&e->mask_index);
|
||||
if (!flow)
|
||||
e->skb_hash = 0;
|
||||
return flow;
|
||||
}
|
||||
|
||||
if (!ce || e->skb_hash < ce->skb_hash)
|
||||
ce = e; /* A better replacement cache candidate. */
|
||||
|
||||
hash >>= MC_HASH_SHIFT;
|
||||
}
|
||||
|
||||
/* Cache miss, do full lookup. */
|
||||
flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
|
||||
if (flow)
|
||||
ce->skb_hash = skb_hash;
|
||||
|
||||
return flow;
|
||||
}
|
||||
|
||||
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
|
||||
const struct sw_flow_key *key)
|
||||
{
|
||||
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
|
||||
struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
|
||||
u32 __always_unused n_mask_hit;
|
||||
u32 index = 0;
|
||||
|
||||
return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
|
||||
return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
|
||||
}
|
||||
|
||||
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
|
||||
const struct sw_flow_match *match)
|
||||
{
|
||||
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
|
||||
struct sw_flow_mask *mask;
|
||||
struct sw_flow *flow;
|
||||
struct mask_array *ma = ovsl_dereference(tbl->mask_array);
|
||||
int i;
|
||||
|
||||
/* Always called under ovs-mutex. */
|
||||
list_for_each_entry(mask, &tbl->mask_list, list) {
|
||||
flow = masked_flow_lookup(ti, match->key, mask);
|
||||
for (i = 0; i < ma->max; i++) {
|
||||
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
|
||||
u32 __always_unused n_mask_hit;
|
||||
struct sw_flow_mask *mask;
|
||||
struct sw_flow *flow;
|
||||
|
||||
mask = ovsl_dereference(ma->masks[i]);
|
||||
if (!mask)
|
||||
continue;
|
||||
|
||||
flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
|
||||
if (flow && ovs_identifier_is_key(&flow->id) &&
|
||||
ovs_flow_cmp_unmasked_key(flow, match))
|
||||
ovs_flow_cmp_unmasked_key(flow, match)) {
|
||||
return flow;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -528,13 +779,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
|
||||
|
||||
int ovs_flow_tbl_num_masks(const struct flow_table *table)
|
||||
{
|
||||
struct sw_flow_mask *mask;
|
||||
int num = 0;
|
||||
|
||||
list_for_each_entry(mask, &table->mask_list, list)
|
||||
num++;
|
||||
|
||||
return num;
|
||||
struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
|
||||
return READ_ONCE(ma->count);
|
||||
}
|
||||
|
||||
static struct table_instance *table_instance_expand(struct table_instance *ti,
|
||||
@@ -543,24 +789,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
|
||||
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
|
||||
}
|
||||
|
||||
/* Remove 'mask' from the mask list, if it is not needed any more. */
|
||||
static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
|
||||
{
|
||||
if (mask) {
|
||||
/* ovs-lock is required to protect mask-refcount and
|
||||
* mask list.
|
||||
*/
|
||||
ASSERT_OVSL();
|
||||
BUG_ON(!mask->ref_count);
|
||||
mask->ref_count--;
|
||||
|
||||
if (!mask->ref_count) {
|
||||
list_del_rcu(&mask->list);
|
||||
kfree_rcu(mask, rcu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Must be called with OVS mutex held. */
|
||||
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
|
||||
{
|
||||
@@ -568,17 +796,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
|
||||
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
|
||||
|
||||
BUG_ON(table->count == 0);
|
||||
hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
|
||||
table->count--;
|
||||
if (ovs_identifier_is_ufid(&flow->id)) {
|
||||
hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
|
||||
table->ufid_count--;
|
||||
}
|
||||
|
||||
/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
|
||||
* accessible as long as the RCU read lock is held.
|
||||
*/
|
||||
flow_mask_remove(table, flow->mask);
|
||||
table_instance_flow_free(table, ti, ufid_ti, flow, true);
|
||||
}
|
||||
|
||||
static struct sw_flow_mask *mask_alloc(void)
|
||||
@@ -606,13 +824,16 @@ static bool mask_equal(const struct sw_flow_mask *a,
|
||||
static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
|
||||
const struct sw_flow_mask *mask)
|
||||
{
|
||||
struct list_head *ml;
|
||||
struct mask_array *ma;
|
||||
int i;
|
||||
|
||||
list_for_each(ml, &tbl->mask_list) {
|
||||
struct sw_flow_mask *m;
|
||||
m = container_of(ml, struct sw_flow_mask, list);
|
||||
if (mask_equal(mask, m))
|
||||
return m;
|
||||
ma = ovsl_dereference(tbl->mask_array);
|
||||
for (i = 0; i < ma->max; i++) {
|
||||
struct sw_flow_mask *t;
|
||||
t = ovsl_dereference(ma->masks[i]);
|
||||
|
||||
if (t && mask_equal(mask, t))
|
||||
return t;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@@ -623,6 +844,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
|
||||
const struct sw_flow_mask *new)
|
||||
{
|
||||
struct sw_flow_mask *mask;
|
||||
|
||||
mask = flow_mask_find(tbl, new);
|
||||
if (!mask) {
|
||||
/* Allocate a new mask if none exsits. */
|
||||
@@ -631,7 +853,12 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
|
||||
return -ENOMEM;
|
||||
mask->key = new->key;
|
||||
mask->range = new->range;
|
||||
list_add_rcu(&mask->list, &tbl->mask_list);
|
||||
|
||||
/* Add mask to mask-list. */
|
||||
if (tbl_mask_array_add_mask(tbl, mask)) {
|
||||
kfree(mask);
|
||||
return -ENOMEM;
|
||||
}
|
||||
} else {
|
||||
BUG_ON(!mask->ref_count);
|
||||
mask->ref_count++;
|
||||
|
||||
@@ -22,6 +22,17 @@
|
||||
|
||||
#include "flow.h"
|
||||
|
||||
struct mask_cache_entry {
|
||||
u32 skb_hash;
|
||||
u32 mask_index;
|
||||
};
|
||||
|
||||
struct mask_array {
|
||||
struct rcu_head rcu;
|
||||
int count, max;
|
||||
struct sw_flow_mask __rcu *masks[];
|
||||
};
|
||||
|
||||
struct table_instance {
|
||||
struct hlist_head *buckets;
|
||||
unsigned int n_buckets;
|
||||
@@ -34,7 +45,8 @@ struct table_instance {
|
||||
struct flow_table {
|
||||
struct table_instance __rcu *ti;
|
||||
struct table_instance __rcu *ufid_ti;
|
||||
struct list_head mask_list;
|
||||
struct mask_cache_entry __percpu *mask_cache;
|
||||
struct mask_array __rcu *mask_array;
|
||||
unsigned long last_rehash;
|
||||
unsigned int count;
|
||||
unsigned int ufid_count;
|
||||
@@ -60,8 +72,9 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table);
|
||||
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
|
||||
u32 *bucket, u32 *idx);
|
||||
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
|
||||
const struct sw_flow_key *,
|
||||
u32 *n_mask_hit);
|
||||
const struct sw_flow_key *,
|
||||
u32 skb_hash,
|
||||
u32 *n_mask_hit);
|
||||
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
|
||||
const struct sw_flow_key *);
|
||||
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
|
||||
|
||||
Reference in New Issue
Block a user