From 2a43747147699c6187d8508b40a28a50f42b0ee5 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Tue, 7 Aug 2018 17:35:58 +0200 Subject: [PATCH 1/6] nfp: flower: set ip tunnel ttl from encap action The TTL for encapsulating headers in IPv4 UDP tunnels is taken from a route lookup. Modify this to first check if a user has specified a TTL to be used in the TC action. Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index e56b815a8dc6..a79d078ab3e8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -238,18 +238,12 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); u32 tmp_set_ip_tun_type_index = 0; - struct flowi4 flow = {}; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; - struct rtable *rt; - struct net *net; - int err; if (ip_tun->options_len) return -EOPNOTSUPP; - net = dev_net(netdev); - set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; @@ -261,19 +255,28 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; - /* Do a route lookup to determine ttl - if fails then use default. - * Note that CONFIG_INET is a requirement of CONFIG_NET_SWITCHDEV so - * must be defined here. - */ - flow.daddr = ip_tun->key.u.ipv4.dst; - flow.flowi4_proto = IPPROTO_UDP; - rt = ip_route_output_key(net, &flow); - err = PTR_ERR_OR_ZERO(rt); - if (!err) { - set_tun->ttl = ip4_dst_hoplimit(&rt->dst); - ip_rt_put(rt); + if (ip_tun->key.ttl) { + set_tun->ttl = ip_tun->key.ttl; } else { - set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + struct net *net = dev_net(netdev); + struct flowi4 flow = {}; + struct rtable *rt; + int err; + + /* Do a route lookup to determine ttl - if fails then use + * default. Note that CONFIG_INET is a requirement of + * CONFIG_NET_SWITCHDEV so must be defined here. + */ + flow.daddr = ip_tun->key.u.ipv4.dst; + flow.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(net, &flow); + err = PTR_ERR_OR_ZERO(rt); + if (!err) { + set_tun->ttl = ip4_dst_hoplimit(&rt->dst); + ip_rt_put(rt); + } else { + set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + } } set_tun->tos = ip_tun->key.tos; From d7ff7ec573860dc654fa4c8641684ba3db03004e Mon Sep 17 00:00:00 2001 From: John Hurley Date: Tue, 7 Aug 2018 17:35:59 +0200 Subject: [PATCH 2/6] nfp: flower: allow matching on ipv4 UDP tunnel tos and ttl The addition of FLOW_DISSECTOR_KEY_ENC_IP to TC flower means that the ToS and TTL of the tunnel header can now be matched on. Extend the NFP tunnel match function to include these new fields. Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 7 +++++-- drivers/net/ethernet/netronome/nfp/flower/match.c | 9 +++++++++ drivers/net/ethernet/netronome/nfp/flower/offload.c | 4 +++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 15f1eacd76b6..174acecfba01 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -346,7 +346,7 @@ struct nfp_flower_ipv6 { * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ipv4_addr_dst | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | Reserved | + * | Reserved | tos | ttl | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -356,7 +356,10 @@ struct nfp_flower_ipv6 { struct nfp_flower_ipv4_udp_tun { __be32 ip_src; __be32 ip_dst; - __be32 reserved[2]; + __be16 reserved1; + u8 tos; + u8 ttl; + __be32 reserved2; __be32 tun_id; }; diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 84f7a5dbea9d..b1cbe6927cba 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -270,6 +270,7 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, struct fl_flow_key *target = mask_version ? flow->mask : flow->key; struct flow_dissector_key_ipv4_addrs *tun_ips; struct flow_dissector_key_keyid *vni; + struct flow_dissector_key_ip *ip; memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); @@ -293,6 +294,14 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, frame->ip_src = tun_ips->src; frame->ip_dst = tun_ips->dst; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { + ip = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IP, + target); + frame->tos = ip->tos; + frame->ttl = ip->ttl; + } } int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 6bc8a97f7e03..d2230a0e49b9 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -66,6 +66,7 @@ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \ BIT(FLOW_DISSECTOR_KEY_MPLS) | \ BIT(FLOW_DISSECTOR_KEY_IP)) @@ -74,7 +75,8 @@ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ - BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP)) #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ From 92e2c4053623f21d61a683f7ef7bd61c8300ac7d Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 7 Aug 2018 17:36:00 +0200 Subject: [PATCH 3/6] flow_dissector: allow dissection of tunnel options from metadata Allow the existing 'dissection' of tunnel metadata to 'dissect' options already present in tunnel metadata. This dissection is controlled by a new dissector key, FLOW_DISSECTOR_KEY_ENC_OPTS. This dissection only occurs when skb_flow_dissect_tunnel_info() is called, currently only the Flower classifier makes that call. So there should be no impact on other users of the flow dissector. This is in preparation for allowing the flower classifier to match on Geneve options. Signed-off-by: Simon Horman Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 17 +++++++++++++++++ net/core/flow_dissector.c | 19 ++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 2a17f041f7a1..6a4586dcdede 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -57,6 +57,21 @@ struct flow_dissector_key_mpls { mpls_label:20; }; +#define FLOW_DIS_TUN_OPTS_MAX 255 +/** + * struct flow_dissector_key_enc_opts: + * @data: tunnel option data + * @len: length of tunnel option data + * @dst_opt_type: tunnel option type + */ +struct flow_dissector_key_enc_opts { + u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired + * here but seems difficult to #include + */ + u8 len; + __be16 dst_opt_type; +}; + struct flow_dissector_key_keyid { __be32 keyid; }; @@ -208,6 +223,8 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ + FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ + FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 08a5184f4b34..ce9eeeb7c024 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -154,7 +154,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) && !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_IP)) + FLOW_DISSECTOR_KEY_ENC_IP) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS)) return; info = skb_tunnel_info(skb); @@ -224,6 +226,21 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, ip->tos = key->tos; ip->ttl = key->ttl; } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + struct flow_dissector_key_enc_opts *enc_opt; + + enc_opt = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + target_container); + + if (info->options_len) { + enc_opt->len = info->options_len; + ip_tunnel_info_opts_get(enc_opt->data, info); + enc_opt->dst_opt_type = info->key.tun_flags & + TUNNEL_OPTIONS_PRESENT; + } + } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); From 0a6e77784f490912d81b92cfd48424541c04691e Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Tue, 7 Aug 2018 17:36:01 +0200 Subject: [PATCH 4/6] net/sched: allow flower to match tunnel options Allow matching on options in Geneve tunnel headers. This makes use of existing tunnel metadata support. The options can be described in the form CLASS:TYPE:DATA/CLASS_MASK:TYPE_MASK:DATA_MASK, where CLASS is represented as a 16bit hexadecimal value, TYPE as an 8bit hexadecimal value and DATA as a variable length hexadecimal value. e.g. # ip link add name geneve0 type geneve dstport 0 external # tc qdisc add dev geneve0 ingress # tc filter add dev geneve0 protocol ip parent ffff: \ flower \ enc_src_ip 10.0.99.192 \ enc_dst_ip 10.0.99.193 \ enc_key_id 11 \ geneve_opts 0102:80:1122334421314151/ffff:ff:ffffffffffffffff \ ip_proto udp \ action mirred egress redirect dev eth1 This patch adds support for matching Geneve options in the order supplied by the user. This leads to an efficient implementation in the software datapath (and in our opinion hardware datapaths that offload this feature). It is also compatible with Geneve options matching provided by the Open vSwitch kernel datapath which is relevant here as the Flower classifier may be used as a mechanism to program flows into hardware as a form of Open vSwitch datapath offload (sometimes referred to as OVS-TC). The netlink Kernel/Userspace API may be extended, for example by adding a flag, if other matching options are desired, for example matching given options in any order. This would require an implementation in the TC software datapath. And be done in a way that drivers that facilitate offload of the Flower classifier can reject or accept such flows based on hardware datapath capabilities. This approach was discussed and agreed on at Netconf 2017 in Seoul. Signed-off-by: Simon Horman Signed-off-by: Pieter Jansen van Vuuren Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 26 ++++ net/sched/cls_flower.c | 244 ++++++++++++++++++++++++++++++++++- 2 files changed, 269 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 48e5b5d49a34..be382fb0592d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -480,11 +480,37 @@ enum { TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */ TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */ + TCA_FLOWER_KEY_ENC_OPTS, + TCA_FLOWER_KEY_ENC_OPTS_MASK, + __TCA_FLOWER_MAX, }; #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) +enum { + TCA_FLOWER_KEY_ENC_OPTS_UNSPEC, + TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GENEVE_ + * attributes + */ + __TCA_FLOWER_KEY_ENC_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */ + + __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1) + enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a3b69bb6f4b0..9da244235170 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,7 @@ struct fl_flow_key { struct flow_dissector_key_tcp tcp; struct flow_dissector_key_ip ip; struct flow_dissector_key_ip enc_ip; + struct flow_dissector_key_enc_opts enc_opts; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, + .len = 128 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap, fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)); } +static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1]; + struct nlattr *class = NULL, *type = NULL, *data = NULL; + struct geneve_opt *opt; + int err, data_len = 0; + + if (option_len > sizeof(struct geneve_opt)) + data_len = option_len - sizeof(struct geneve_opt); + + opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; + memset(opt, 0xff, option_len); + opt->length = data_len / 4; + opt->r1 = 0; + opt->r2 = 0; + opt->r3 = 0; + + /* If no mask has been prodived we assume an exact match. */ + if (!depth) + return sizeof(struct geneve_opt) + data_len; + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) { + NL_SET_ERR_MSG(extack, "Non-geneve option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); + if (err < 0) + return err; + + /* We are not allowed to omit any of CLASS, TYPE or DATA + * fields from the key. + */ + if (!option_len && + (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] || + !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] || + !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) { + NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data"); + return -EINVAL; + } + + /* Omitting any of CLASS, TYPE or DATA fields is allowed + * for the mask. + */ + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) { + int new_len = key->enc_opts.len; + + data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]; + data_len = nla_len(data); + if (data_len < 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long"); + return -ERANGE; + } + if (data_len % 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long"); + return -ERANGE; + } + + new_len += sizeof(struct geneve_opt) + data_len; + BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX); + if (new_len > FLOW_DIS_TUN_OPTS_MAX) { + NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size"); + return -ERANGE; + } + opt->length = data_len / 4; + memcpy(opt->opt_data, nla_data(data), data_len); + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) { + class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]; + opt->opt_class = nla_get_be16(class); + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) { + type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]; + opt->type = nla_get_u8(type); + } + + return sizeof(struct geneve_opt) + data_len; +} + +static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; + int option_len, key_depth, msk_depth = 0; + + nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); + + if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { + nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + } + + nla_for_each_attr(nla_opt_key, nla_enc_key, + nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) { + switch (nla_type(nla_opt_key)) { + case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: + option_len = 0; + key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + option_len = fl_set_geneve_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + option_len = fl_set_geneve_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); + return -EINVAL; + } + + if (msk_depth) + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); + break; + default: + NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); + return -EINVAL; + } + } + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip); + if (tb[TCA_FLOWER_KEY_ENC_OPTS]) { + ret = fl_set_enc_opt(tb, key, mask, extack); + if (ret) + return ret; + } + if (tb[TCA_FLOWER_KEY_FLAGS]) ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags); @@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ENC_IP, enc_ip); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts); skb_flow_dissector_init(dissector, keys, cnt); } @@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); } +static int fl_dump_key_geneve_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct geneve_opt *opt; + struct nlattr *nest; + int opt_off = 0; + + nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); + if (!nest) + goto nla_put_failure; + + while (enc_opts->len > opt_off) { + opt = (struct geneve_opt *)&enc_opts->data[opt_off]; + + if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, + opt->opt_class)) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, + opt->type)) + goto nla_put_failure; + if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, + opt->length * 4, opt->opt_data)) + goto nla_put_failure; + + opt_off += sizeof(struct geneve_opt) + opt->length * 4; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct nlattr *nest; + int err; + + if (!enc_opts->len) + return 0; + + nest = nla_nest_start(skb, enc_opt_type); + if (!nest) + goto nla_put_failure; + + switch (enc_opts->dst_opt_type) { + case TUNNEL_GENEVE_OPT: + err = fl_dump_key_geneve_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; + default: + goto nla_put_failure; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_enc_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *key_opts, + struct flow_dissector_key_enc_opts *msk_opts) +{ + int err; + + err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts); + if (err) + return err; + + return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts); +} + static int fl_dump_key(struct sk_buff *skb, struct net *net, struct fl_flow_key *key, struct fl_flow_key *mask) { @@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, sizeof(key->enc_tp.dst)) || - fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip)) + fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) || + fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts)) goto nla_put_failure; if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) From 9e7c32fe44248b5101173b1184707bc5506e00f3 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Tue, 7 Aug 2018 17:36:02 +0200 Subject: [PATCH 5/6] nfp: flower: add geneve option push action offload Introduce new push geneve option action. This allows offloading filters configured to entunnel geneve with options. Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 100 ++++++++++++++++-- .../net/ethernet/netronome/nfp/flower/cmsg.h | 20 +++- .../net/ethernet/netronome/nfp/flower/main.h | 1 + 3 files changed, 114 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index a79d078ab3e8..0ba0356ec4e6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -32,6 +32,7 @@ */ #include +#include #include #include #include @@ -45,7 +46,15 @@ #include "main.h" #include "../nfp_net_repr.h" -#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (TUNNEL_CSUM | TUNNEL_KEY) +/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable + * to change. Such changes will break our FW ABI. + */ +#define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) +#define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) +#define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) +#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ + NFP_FL_TUNNEL_KEY | \ + NFP_FL_TUNNEL_GENEVE_OPT) static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) { @@ -229,7 +238,71 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) } static int -nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, +nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, + const struct tc_action *action) +{ + struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + int opt_len, opt_cnt, act_start, tot_push_len; + u8 *src = ip_tunnel_info_opts(ip_tun); + + /* We need to populate the options in reverse order for HW. + * Therefore we go through the options, calculating the + * number of options and the total size, then we populate + * them in reverse order in the action list. + */ + opt_cnt = 0; + tot_push_len = 0; + opt_len = ip_tun->options_len; + while (opt_len > 0) { + struct geneve_opt *opt = (struct geneve_opt *)src; + + opt_cnt++; + if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) + return -EOPNOTSUPP; + + tot_push_len += sizeof(struct nfp_fl_push_geneve) + + opt->length * 4; + if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) + return -EOPNOTSUPP; + + opt_len -= sizeof(struct geneve_opt) + opt->length * 4; + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + act_start = *list_len; + *list_len += tot_push_len; + src = ip_tunnel_info_opts(ip_tun); + while (opt_cnt) { + struct geneve_opt *opt = (struct geneve_opt *)src; + struct nfp_fl_push_geneve *push; + size_t act_size, len; + + opt_cnt--; + act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4; + tot_push_len -= act_size; + len = act_start + tot_push_len; + + push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len]; + push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE; + push->head.len_lw = act_size >> NFP_FL_LW_SIZ; + push->reserved = 0; + push->class = opt->opt_class; + push->type = opt->type; + push->length = opt->length; + memcpy(&push->opt_data, opt->opt_data, opt->length * 4); + + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +static int +nfp_fl_set_ipv4_udp_tun(struct nfp_app *app, + struct nfp_fl_set_ipv4_udp_tun *set_tun, const struct tc_action *action, struct nfp_fl_pre_tunnel *pre_tun, enum nfp_flower_tun_type tun_type, @@ -237,11 +310,17 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, { size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + struct nfp_flower_priv *priv = app->priv; u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; - if (ip_tun->options_len) + BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || + NFP_FL_TUNNEL_KEY != TUNNEL_KEY || + NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); + if (ip_tun->options_len && + (tun_type != NFP_FL_TUNNEL_GENEVE || + !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) return -EOPNOTSUPP; set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; @@ -281,11 +360,16 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, set_tun->tos = ip_tun->key.tos; - if (!(ip_tun->key.tun_flags & TUNNEL_KEY) || + if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) || ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) return -EOPNOTSUPP; set_tun->tun_flags = ip_tun->key.tun_flags; + if (tun_type == NFP_FL_TUNNEL_GENEVE) { + set_tun->tun_proto = htons(ETH_P_TEB); + set_tun->tun_len = ip_tun->options_len / 4; + } + /* Complete pre_tunnel action. */ pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; @@ -674,9 +758,13 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); *a_len += sizeof(struct nfp_fl_pre_tunnel); + err = nfp_fl_push_geneve_options(nfp_fl, a_len, a); + if (err) + return err; + set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type, - netdev); + err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun, + *tun_type, netdev); if (err) return err; *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 174acecfba01..f2aeae88cbf0 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -37,6 +37,7 @@ #include #include #include +#include #include "../nfp_app.h" #include "../nfpcore/nfp_cpp.h" @@ -81,6 +82,10 @@ #define NFP_FL_MAX_A_SIZ 1216 #define NFP_FL_LW_SIZ 2 +/* Maximum allowed geneve options */ +#define NFP_FL_MAX_GENEVE_OPT_ACT 32 +#define NFP_FL_MAX_GENEVE_OPT_CNT 64 + /* Action opcodes */ #define NFP_FL_ACTION_OPCODE_OUTPUT 0 #define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 @@ -94,6 +99,7 @@ #define NFP_FL_ACTION_OPCODE_SET_TCP 15 #define NFP_FL_ACTION_OPCODE_PRE_LAG 16 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26 #define NFP_FL_ACTION_OPCODE_NUM 32 #define NFP_FL_OUT_FLAGS_LAST BIT(15) @@ -206,7 +212,19 @@ struct nfp_fl_set_ipv4_udp_tun { __be16 tun_flags; u8 ttl; u8 tos; - __be32 extra[2]; + __be32 extra; + u8 tun_len; + u8 res2; + __be16 tun_proto; +}; + +struct nfp_fl_push_geneve { + struct nfp_fl_act_head head; + __be16 reserved; + __be16 class; + u8 type; + u8 length; + u8 opt_data[]; }; /* Metadata with L2 (1W/4B) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index ef2114d13387..85f8209bf007 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -69,6 +69,7 @@ struct nfp_app; /* Extra features bitmap. */ #define NFP_FL_FEATS_GENEVE BIT(0) #define NFP_FL_NBI_MTU_SETTING BIT(1) +#define NFP_FL_FEATS_GENEVE_OPT BIT(2) #define NFP_FL_FEATS_LAG BIT(31) struct nfp_fl_mask_id { From 0a22b17a6b1ddb161fae7452faa892ba4d77ebe9 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Tue, 7 Aug 2018 17:36:03 +0200 Subject: [PATCH 6/6] nfp: flower: add geneve option match offload Introduce a new layer for matching on geneve options. This allows offloading filters configured to match geneve with options. Signed-off-by: Pieter Jansen van Vuuren Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/cmsg.h | 6 +++ .../net/ethernet/netronome/nfp/flower/match.c | 25 ++++++++++++ .../ethernet/netronome/nfp/flower/offload.c | 38 +++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index f2aeae88cbf0..325954b829c8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -52,6 +52,7 @@ #define NFP_FLOWER_LAYER_VXLAN BIT(7) #define NFP_FLOWER_LAYER2_GENEVE BIT(5) +#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) #define NFP_FLOWER_MASK_VLAN_CFI BIT(12) @@ -85,6 +86,7 @@ /* Maximum allowed geneve options */ #define NFP_FL_MAX_GENEVE_OPT_ACT 32 #define NFP_FL_MAX_GENEVE_OPT_CNT 64 +#define NFP_FL_MAX_GENEVE_OPT_KEY 32 /* Action opcodes */ #define NFP_FL_ACTION_OPCODE_OUTPUT 0 @@ -381,6 +383,10 @@ struct nfp_flower_ipv4_udp_tun { __be32 tun_id; }; +struct nfp_flower_geneve_options { + u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; +}; + #define NFP_FL_TUN_VNI_OFFSET 8 /* The base header for a control message packet. diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index b1cbe6927cba..a0c72f277faa 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -262,6 +262,21 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version); } +static int +nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_enc_opts *opts; + + opts = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + target); + memcpy(key_buf, opts->data, opts->len); + + return 0; +} + static void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, struct tc_cls_flower_offload *flow, @@ -424,6 +439,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, nfp_flow->nfp_tun_ipv4_addr = tun_dst; nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst); } + + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + err = nfp_flower_compile_geneve_opt(ext, flow, false); + if (err) + return err; + + err = nfp_flower_compile_geneve_opt(msk, flow, true); + if (err) + return err; + } } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index d2230a0e49b9..2edab01c3beb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -66,6 +66,7 @@ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \ BIT(FLOW_DISSECTOR_KEY_MPLS) | \ BIT(FLOW_DISSECTOR_KEY_IP)) @@ -75,6 +76,7 @@ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IP)) @@ -140,6 +142,21 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f) dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP); } +static int +nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, + u32 *key_layer_two, int *key_size) +{ + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) + return -EOPNOTSUPP; + + if (enc_opts->len > 0) { + *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP; + *key_size += sizeof(struct nfp_flower_geneve_options); + } + + return 0; +} + static int nfp_flower_calculate_key_layers(struct nfp_app *app, struct nfp_fl_key_ls *ret_key_ls, @@ -153,6 +170,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, u32 key_layer_two; u8 key_layer; int key_size; + int err; if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) return -EOPNOTSUPP; @@ -178,6 +196,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL; struct flow_dissector_key_ports *mask_enc_ports = NULL; + struct flow_dissector_key_enc_opts *enc_op = NULL; struct flow_dissector_key_ports *enc_ports = NULL; struct flow_dissector_key_control *mask_enc_ctl = skb_flow_dissector_target(flow->dissector, @@ -214,11 +233,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (mask_enc_ports->dst != cpu_to_be16(~0)) return -EOPNOTSUPP; + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS)) { + enc_op = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + flow->key); + } + switch (enc_ports->dst) { case htons(NFP_FL_VXLAN_PORT): *tun_type = NFP_FL_TUNNEL_VXLAN; key_layer |= NFP_FLOWER_LAYER_VXLAN; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (enc_op) + return -EOPNOTSUPP; break; case htons(NFP_FL_GENEVE_PORT): if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) @@ -228,6 +257,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, key_size += sizeof(struct nfp_flower_ext_meta); key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (!enc_op) + break; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) + return -EOPNOTSUPP; + err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two, + &key_size); + if (err) + return err; break; default: return -EOPNOTSUPP;