diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index 4ad5109059e0..bac5be4d4f43 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -20,6 +20,7 @@ config NFP
 	tristate "Netronome(R) NFP4000/NFP6000 NIC driver"
 	depends on PCI && PCI_MSI
 	depends on VXLAN || VXLAN=n
+	depends on TLS && TLS_DEVICE || TLS_DEVICE=n
 	select NET_DEVLINK
 	---help---
 	  This driver supports the Netronome(R) NFP4000/NFP6000 based
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 87bf784f8e8f..2805641965f3 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -16,6 +16,7 @@ nfp-objs := \
 	    nfpcore/nfp_rtsym.o \
 	    nfpcore/nfp_target.o \
 	    ccm.o \
+	    ccm_mbox.o \
 	    nfp_asm.o \
 	    nfp_app.o \
 	    nfp_app_nic.o \
@@ -34,6 +35,11 @@ nfp-objs := \
 	    nfp_shared_buf.o \
 	    nic/main.o
 
+ifeq ($(CONFIG_TLS_DEVICE),y)
+nfp-objs += \
+	    crypto/tls.o
+endif
+
 ifeq ($(CONFIG_NFP_APP_FLOWER),y)
 nfp-objs += \
 	    flower/action.o \
diff --git a/drivers/net/ethernet/netronome/nfp/ccm.c b/drivers/net/ethernet/netronome/nfp/ccm.c
index 94476e41e261..71afd111bae3 100644
--- a/drivers/net/ethernet/netronome/nfp/ccm.c
+++ b/drivers/net/ethernet/netronome/nfp/ccm.c
@@ -7,9 +7,6 @@
 #include "nfp_app.h"
 #include "nfp_net.h"
 
-#define NFP_CCM_TYPE_REPLY_BIT		7
-#define __NFP_CCM_REPLY(req)		(BIT(NFP_CCM_TYPE_REPLY_BIT) | (req))
-
 #define ccm_warn(app, msg...)	nn_dp_warn(&(app)->ctrl->dp, msg)
 
 #define NFP_CCM_TAG_ALLOC_SPAN	(U16_MAX / 4)
diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h
index ac963b128203..01efa779ab31 100644
--- a/drivers/net/ethernet/netronome/nfp/ccm.h
+++ b/drivers/net/ethernet/netronome/nfp/ccm.h
@@ -9,6 +9,7 @@
 #include <linux/wait.h>
 
 struct nfp_app;
+struct nfp_net;
 
 /* Firmware ABI */
 
@@ -21,15 +22,27 @@ enum nfp_ccm_type {
 	NFP_CCM_TYPE_BPF_MAP_GETNEXT	= 6,
 	NFP_CCM_TYPE_BPF_MAP_GETFIRST	= 7,
 	NFP_CCM_TYPE_BPF_BPF_EVENT	= 8,
+	NFP_CCM_TYPE_CRYPTO_RESET	= 9,
+	NFP_CCM_TYPE_CRYPTO_ADD		= 10,
+	NFP_CCM_TYPE_CRYPTO_DEL		= 11,
+	NFP_CCM_TYPE_CRYPTO_UPDATE	= 12,
 	__NFP_CCM_TYPE_MAX,
 };
 
 #define NFP_CCM_ABI_VERSION		1
 
+#define NFP_CCM_TYPE_REPLY_BIT		7
+#define __NFP_CCM_REPLY(req)		(BIT(NFP_CCM_TYPE_REPLY_BIT) | (req))
+
 struct nfp_ccm_hdr {
-	u8 type;
-	u8 ver;
-	__be16 tag;
+	union {
+		struct {
+			u8 type;
+			u8 ver;
+			__be16 tag;
+		};
+		__be32 raw;
+	};
 };
 
 static inline u8 nfp_ccm_get_type(struct sk_buff *skb)
@@ -41,15 +54,31 @@ static inline u8 nfp_ccm_get_type(struct sk_buff *skb)
 	return hdr->type;
 }
 
-static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb)
+static inline __be16 __nfp_ccm_get_tag(struct sk_buff *skb)
 {
 	struct nfp_ccm_hdr *hdr;
 
 	hdr = (struct nfp_ccm_hdr *)skb->data;
 
-	return be16_to_cpu(hdr->tag);
+	return hdr->tag;
 }
 
+static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb)
+{
+	return be16_to_cpu(__nfp_ccm_get_tag(skb));
+}
+
+#define NFP_NET_MBOX_TLV_TYPE		GENMASK(31, 16)
+#define NFP_NET_MBOX_TLV_LEN		GENMASK(15, 0)
+
+enum nfp_ccm_mbox_tlv_type {
+	NFP_NET_MBOX_TLV_TYPE_UNKNOWN	= 0,
+	NFP_NET_MBOX_TLV_TYPE_END	= 1,
+	NFP_NET_MBOX_TLV_TYPE_MSG	= 2,
+	NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP	= 3,
+	NFP_NET_MBOX_TLV_TYPE_RESV	= 4,
+};
+
 /* Implementation */
 
 /**
@@ -80,4 +109,13 @@ void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb);
 struct sk_buff *
 nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb,
 		    enum nfp_ccm_type type, unsigned int reply_size);
+
+bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size);
+struct sk_buff *
+nfp_ccm_mbox_alloc(struct nfp_net *nn, unsigned int req_size,
+		   unsigned int reply_size, gfp_t flags);
+int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			     enum nfp_ccm_type type,
+			     unsigned int reply_size,
+			     unsigned int max_reply_size);
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
new file mode 100644
index 000000000000..e5acd96c3335
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/bitfield.h>
+#include <linux/io.h>
+#include <linux/skbuff.h>
+
+#include "ccm.h"
+#include "nfp_net.h"
+
+/* CCM messages via the mailbox.  CMSGs get wrapped into simple TLVs
+ * and copied into the mailbox.  Multiple messages can be copied to
+ * form a batch.  Threads come in with CMSG formed in an skb, then
+ * enqueue that skb onto the request queue.  If threads skb is first
+ * in queue this thread will handle the mailbox operation.  It copies
+ * up to 16 messages into the mailbox (making sure that both requests
+ * and replies will fit.  After FW is done processing the batch it
+ * copies the data out and wakes waiting threads.
+ * If a thread is waiting it either gets its the message completed
+ * (response is copied into the same skb as the request, overwriting
+ * it), or becomes the first in queue.
+ * Completions and next-to-run are signaled via the control buffer
+ * to limit potential cache line bounces.
+ */
+
+#define NFP_CCM_MBOX_BATCH_LIMIT	16
+#define NFP_CCM_TIMEOUT			(NFP_NET_POLL_TIMEOUT * 1000)
+#define NFP_CCM_MAX_QLEN		256
+
+enum nfp_net_mbox_cmsg_state {
+	NFP_NET_MBOX_CMSG_STATE_QUEUED,
+	NFP_NET_MBOX_CMSG_STATE_NEXT,
+	NFP_NET_MBOX_CMSG_STATE_BUSY,
+	NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND,
+	NFP_NET_MBOX_CMSG_STATE_DONE,
+};
+
+/**
+ * struct nfp_ccm_mbox_skb_cb - CCM mailbox specific info
+ * @state:	processing state (/stage) of the message
+ * @err:	error encountered during processing if any
+ * @max_len:	max(request_len, reply_len)
+ * @exp_reply:	expected reply length (0 means don't validate)
+ */
+struct nfp_ccm_mbox_cmsg_cb {
+	enum nfp_net_mbox_cmsg_state state;
+	int err;
+	unsigned int max_len;
+	unsigned int exp_reply;
+};
+
+static u32 nfp_ccm_mbox_max_msg(struct nfp_net *nn)
+{
+	return round_down(nn->tlv_caps.mbox_len, 4) -
+		NFP_NET_CFG_MBOX_SIMPLE_VAL - /* common mbox command header */
+		4 * 2; /* Msg TLV plus End TLV headers */
+}
+
+static void
+nfp_ccm_mbox_msg_init(struct sk_buff *skb, unsigned int exp_reply, int max_len)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	cb->state = NFP_NET_MBOX_CMSG_STATE_QUEUED;
+	cb->err = 0;
+	cb->max_len = max_len;
+	cb->exp_reply = exp_reply;
+}
+
+static int nfp_ccm_mbox_maxlen(const struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->max_len;
+}
+
+static bool nfp_ccm_mbox_done(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->state == NFP_NET_MBOX_CMSG_STATE_DONE;
+}
+
+static bool nfp_ccm_mbox_in_progress(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->state != NFP_NET_MBOX_CMSG_STATE_QUEUED &&
+	       cb->state != NFP_NET_MBOX_CMSG_STATE_NEXT;
+}
+
+static void nfp_ccm_mbox_set_busy(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	cb->state = NFP_NET_MBOX_CMSG_STATE_BUSY;
+}
+
+static bool nfp_ccm_mbox_is_first(struct nfp_net *nn, struct sk_buff *skb)
+{
+	return skb_queue_is_first(&nn->mbox_cmsg.queue, skb);
+}
+
+static bool nfp_ccm_mbox_should_run(struct nfp_net *nn, struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->state == NFP_NET_MBOX_CMSG_STATE_NEXT;
+}
+
+static void nfp_ccm_mbox_mark_next_runner(struct nfp_net *nn)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb;
+	struct sk_buff *skb;
+
+	skb = skb_peek(&nn->mbox_cmsg.queue);
+	if (!skb)
+		return;
+
+	cb = (void *)skb->cb;
+	cb->state = NFP_NET_MBOX_CMSG_STATE_NEXT;
+}
+
+static void
+nfp_ccm_mbox_write_tlv(struct nfp_net *nn, u32 off, u32 type, u32 len)
+{
+	nn_writel(nn, off,
+		  FIELD_PREP(NFP_NET_MBOX_TLV_TYPE, type) |
+		  FIELD_PREP(NFP_NET_MBOX_TLV_LEN, len));
+}
+
+static void nfp_ccm_mbox_copy_in(struct nfp_net *nn, struct sk_buff *last)
+{
+	struct sk_buff *skb;
+	int reserve, i, cnt;
+	__be32 *data;
+	u32 off, len;
+
+	off = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	skb = __skb_peek(&nn->mbox_cmsg.queue);
+	while (true) {
+		nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_MSG,
+				       skb->len);
+		off += 4;
+
+		/* Write data word by word, skb->data should be aligned */
+		data = (__be32 *)skb->data;
+		cnt = skb->len / 4;
+		for (i = 0 ; i < cnt; i++) {
+			nn_writel(nn, off, be32_to_cpu(data[i]));
+			off += 4;
+		}
+		if (skb->len & 3) {
+			__be32 tmp = 0;
+
+			memcpy(&tmp, &data[i], skb->len & 3);
+			nn_writel(nn, off, be32_to_cpu(tmp));
+			off += 4;
+		}
+
+		/* Reserve space if reply is bigger */
+		len = round_up(skb->len, 4);
+		reserve = nfp_ccm_mbox_maxlen(skb) - len;
+		if (reserve > 0) {
+			nfp_ccm_mbox_write_tlv(nn, off,
+					       NFP_NET_MBOX_TLV_TYPE_RESV,
+					       reserve);
+			off += 4 + reserve;
+		}
+
+		if (skb == last)
+			break;
+		skb = skb_queue_next(&nn->mbox_cmsg.queue, skb);
+	}
+
+	nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_END, 0);
+}
+
+static struct sk_buff *
+nfp_ccm_mbox_find_req(struct nfp_net *nn, __be16 tag, struct sk_buff *last)
+{
+	struct sk_buff *skb;
+
+	skb = __skb_peek(&nn->mbox_cmsg.queue);
+	while (true) {
+		if (__nfp_ccm_get_tag(skb) == tag)
+			return skb;
+
+		if (skb == last)
+			return NULL;
+		skb = skb_queue_next(&nn->mbox_cmsg.queue, skb);
+	}
+}
+
+static void nfp_ccm_mbox_copy_out(struct nfp_net *nn, struct sk_buff *last)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb;
+	u8 __iomem *data, *end;
+	struct sk_buff *skb;
+
+	data = nn->dp.ctrl_bar + nn->tlv_caps.mbox_off +
+		NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	end = data + nn->tlv_caps.mbox_len;
+
+	while (true) {
+		unsigned int length, offset, type;
+		struct nfp_ccm_hdr hdr;
+		__be32 *skb_data;
+		u32 tlv_hdr;
+		int i, cnt;
+
+		tlv_hdr = readl(data);
+		type = FIELD_GET(NFP_NET_MBOX_TLV_TYPE, tlv_hdr);
+		length = FIELD_GET(NFP_NET_MBOX_TLV_LEN, tlv_hdr);
+		offset = data - nn->dp.ctrl_bar;
+
+		/* Advance past the header */
+		data += 4;
+
+		if (data + length > end) {
+			nn_dp_warn(&nn->dp, "mailbox oversized TLV type:%d offset:%u len:%u\n",
+				   type, offset, length);
+			break;
+		}
+
+		if (type == NFP_NET_MBOX_TLV_TYPE_END)
+			break;
+		if (type == NFP_NET_MBOX_TLV_TYPE_RESV)
+			goto next_tlv;
+		if (type != NFP_NET_MBOX_TLV_TYPE_MSG &&
+		    type != NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) {
+			nn_dp_warn(&nn->dp, "mailbox unknown TLV type:%d offset:%u len:%u\n",
+				   type, offset, length);
+			break;
+		}
+
+		if (length < 4) {
+			nn_dp_warn(&nn->dp, "mailbox msg too short to contain header TLV type:%d offset:%u len:%u\n",
+				   type, offset, length);
+			break;
+		}
+
+		hdr.raw = cpu_to_be32(readl(data));
+
+		skb = nfp_ccm_mbox_find_req(nn, hdr.tag, last);
+		if (!skb) {
+			nn_dp_warn(&nn->dp, "mailbox request not found:%u\n",
+				   be16_to_cpu(hdr.tag));
+			break;
+		}
+		cb = (void *)skb->cb;
+
+		if (type == NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) {
+			nn_dp_warn(&nn->dp,
+				   "mailbox msg not supported type:%d\n",
+				   nfp_ccm_get_type(skb));
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+
+		if (hdr.type != __NFP_CCM_REPLY(nfp_ccm_get_type(skb))) {
+			nn_dp_warn(&nn->dp, "mailbox msg reply wrong type:%u expected:%lu\n",
+				   hdr.type,
+				   __NFP_CCM_REPLY(nfp_ccm_get_type(skb)));
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+		if (cb->exp_reply && length != cb->exp_reply) {
+			nn_dp_warn(&nn->dp, "mailbox msg reply wrong size type:%u expected:%u have:%u\n",
+				   hdr.type, length, cb->exp_reply);
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+		if (length > cb->max_len) {
+			nn_dp_warn(&nn->dp, "mailbox msg oversized reply type:%u max:%u have:%u\n",
+				   hdr.type, cb->max_len, length);
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+
+		if (length <= skb->len)
+			__skb_trim(skb, length);
+		else
+			skb_put(skb, length - skb->len);
+
+		/* We overcopy here slightly, but that's okay, the skb is large
+		 * enough, and the garbage will be ignored (beyond skb->len).
+		 */
+		skb_data = (__be32 *)skb->data;
+		memcpy(skb_data, &hdr, 4);
+
+		cnt = DIV_ROUND_UP(length, 4);
+		for (i = 1 ; i < cnt; i++)
+			skb_data[i] = cpu_to_be32(readl(data + i * 4));
+
+		cb->state = NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND;
+next_tlv:
+		data += round_up(length, 4);
+		if (data + 4 > end) {
+			nn_dp_warn(&nn->dp,
+				   "reached end of MBOX without END TLV\n");
+			break;
+		}
+	}
+
+	smp_wmb(); /* order the skb->data vs. cb->state */
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+	do {
+		skb = __skb_dequeue(&nn->mbox_cmsg.queue);
+		cb = (void *)skb->cb;
+
+		if (cb->state != NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND) {
+			cb->err = -ENOENT;
+			smp_wmb(); /* order the cb->err vs. cb->state */
+		}
+		cb->state = NFP_NET_MBOX_CMSG_STATE_DONE;
+	} while (skb != last);
+
+	nfp_ccm_mbox_mark_next_runner(nn);
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+}
+
+static void
+nfp_ccm_mbox_mark_all_err(struct nfp_net *nn, struct sk_buff *last, int err)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb;
+	struct sk_buff *skb;
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+	do {
+		skb = __skb_dequeue(&nn->mbox_cmsg.queue);
+		cb = (void *)skb->cb;
+
+		cb->err = err;
+		smp_wmb(); /* order the cb->err vs. cb->state */
+		cb->state = NFP_NET_MBOX_CMSG_STATE_DONE;
+	} while (skb != last);
+
+	nfp_ccm_mbox_mark_next_runner(nn);
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+}
+
+static void nfp_ccm_mbox_run_queue_unlock(struct nfp_net *nn)
+	__releases(&nn->mbox_cmsg.queue.lock)
+{
+	int space = nn->tlv_caps.mbox_len - NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	struct sk_buff *skb, *last;
+	int cnt, err;
+
+	space -= 4; /* for End TLV */
+
+	/* First skb must fit, because it's ours and we checked it fits */
+	cnt = 1;
+	last = skb = __skb_peek(&nn->mbox_cmsg.queue);
+	space -= 4 + nfp_ccm_mbox_maxlen(skb);
+
+	while (!skb_queue_is_last(&nn->mbox_cmsg.queue, last)) {
+		skb = skb_queue_next(&nn->mbox_cmsg.queue, last);
+		space -= 4 + nfp_ccm_mbox_maxlen(skb);
+		if (space < 0)
+			break;
+		last = skb;
+		nfp_ccm_mbox_set_busy(skb);
+		cnt++;
+		if (cnt == NFP_CCM_MBOX_BATCH_LIMIT)
+			break;
+	}
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+	/* Now we own all skb's marked in progress, new requests may arrive
+	 * at the end of the queue.
+	 */
+
+	nn_ctrl_bar_lock(nn);
+
+	nfp_ccm_mbox_copy_in(nn, last);
+
+	err = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_TLV_CMSG);
+	if (!err)
+		nfp_ccm_mbox_copy_out(nn, last);
+	else
+		nfp_ccm_mbox_mark_all_err(nn, last, -EIO);
+
+	nn_ctrl_bar_unlock(nn);
+
+	wake_up_all(&nn->mbox_cmsg.wq);
+}
+
+static int nfp_ccm_mbox_skb_return(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	if (cb->err)
+		dev_kfree_skb_any(skb);
+	return cb->err;
+}
+
+/* If wait timed out but the command is already in progress we have
+ * to wait until it finishes.  Runners has ownership of the skbs marked
+ * as busy.
+ */
+static int
+nfp_ccm_mbox_unlink_unlock(struct nfp_net *nn, struct sk_buff *skb,
+			   enum nfp_ccm_type type)
+	__releases(&nn->mbox_cmsg.queue.lock)
+{
+	bool was_first;
+
+	if (nfp_ccm_mbox_in_progress(skb)) {
+		spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+		wait_event(nn->mbox_cmsg.wq, nfp_ccm_mbox_done(skb));
+		smp_rmb(); /* pairs with smp_wmb() after data is written */
+		return nfp_ccm_mbox_skb_return(skb);
+	}
+
+	was_first = nfp_ccm_mbox_should_run(nn, skb);
+	__skb_unlink(skb, &nn->mbox_cmsg.queue);
+	if (was_first)
+		nfp_ccm_mbox_mark_next_runner(nn);
+
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+	if (was_first)
+		wake_up_all(&nn->mbox_cmsg.wq);
+
+	nn_dp_warn(&nn->dp, "time out waiting for mbox response to 0x%02x\n",
+		   type);
+	return -ETIMEDOUT;
+}
+
+static int
+nfp_ccm_mbox_msg_prepare(struct nfp_net *nn, struct sk_buff *skb,
+			 enum nfp_ccm_type type,
+			 unsigned int reply_size, unsigned int max_reply_size,
+			 gfp_t flags)
+{
+	const unsigned int mbox_max = nfp_ccm_mbox_max_msg(nn);
+	unsigned int max_len;
+	ssize_t undersize;
+	int err;
+
+	if (unlikely(!(nn->tlv_caps.mbox_cmsg_types & BIT(type)))) {
+		nn_dp_warn(&nn->dp,
+			   "message type %d not supported by mailbox\n", type);
+		return -EINVAL;
+	}
+
+	/* If the reply size is unknown assume it will take the entire
+	 * mailbox, the callers should do their best for this to never
+	 * happen.
+	 */
+	if (!max_reply_size)
+		max_reply_size = mbox_max;
+	max_reply_size = round_up(max_reply_size, 4);
+
+	/* Make sure we can fit the entire reply into the skb,
+	 * and that we don't have to slow down the mbox handler
+	 * with allocations.
+	 */
+	undersize = max_reply_size - (skb_end_pointer(skb) - skb->data);
+	if (undersize > 0) {
+		err = pskb_expand_head(skb, 0, undersize, flags);
+		if (err) {
+			nn_dp_warn(&nn->dp,
+				   "can't allocate reply buffer for mailbox\n");
+			return err;
+		}
+	}
+
+	/* Make sure that request and response both fit into the mailbox */
+	max_len = max(max_reply_size, round_up(skb->len, 4));
+	if (max_len > mbox_max) {
+		nn_dp_warn(&nn->dp,
+			   "message too big for tha mailbox: %u/%u vs %u\n",
+			   skb->len, max_reply_size, mbox_max);
+		return -EMSGSIZE;
+	}
+
+	nfp_ccm_mbox_msg_init(skb, reply_size, max_len);
+
+	return 0;
+}
+
+static int
+nfp_ccm_mbox_msg_enqueue(struct nfp_net *nn, struct sk_buff *skb,
+			 enum nfp_ccm_type type)
+{
+	struct nfp_ccm_hdr *hdr;
+
+	assert_spin_locked(&nn->mbox_cmsg.queue.lock);
+
+	if (nn->mbox_cmsg.queue.qlen >= NFP_CCM_MAX_QLEN) {
+		nn_dp_warn(&nn->dp, "mailbox request queue too long\n");
+		return -EBUSY;
+	}
+
+	hdr = (void *)skb->data;
+	hdr->ver = NFP_CCM_ABI_VERSION;
+	hdr->type = type;
+	hdr->tag = cpu_to_be16(nn->mbox_cmsg.tag++);
+
+	__skb_queue_tail(&nn->mbox_cmsg.queue, skb);
+
+	return 0;
+}
+
+int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			     enum nfp_ccm_type type,
+			     unsigned int reply_size,
+			     unsigned int max_reply_size)
+{
+	int err;
+
+	err = nfp_ccm_mbox_msg_prepare(nn, skb, type, reply_size,
+				       max_reply_size, GFP_KERNEL);
+	if (err)
+		goto err_free_skb;
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+	err = nfp_ccm_mbox_msg_enqueue(nn, skb, type);
+	if (err)
+		goto err_unlock;
+
+	/* First in queue takes the mailbox lock and processes the batch */
+	if (!nfp_ccm_mbox_is_first(nn, skb)) {
+		bool to;
+
+		spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+		to = !wait_event_timeout(nn->mbox_cmsg.wq,
+					 nfp_ccm_mbox_done(skb) ||
+					 nfp_ccm_mbox_should_run(nn, skb),
+					 msecs_to_jiffies(NFP_CCM_TIMEOUT));
+
+		/* fast path for those completed by another thread */
+		if (nfp_ccm_mbox_done(skb)) {
+			smp_rmb(); /* pairs with wmb after data is written */
+			return nfp_ccm_mbox_skb_return(skb);
+		}
+
+		spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+		if (!nfp_ccm_mbox_is_first(nn, skb)) {
+			WARN_ON(!to);
+
+			err = nfp_ccm_mbox_unlink_unlock(nn, skb, type);
+			if (err)
+				goto err_free_skb;
+			return 0;
+		}
+	}
+
+	/* run queue expects the lock held */
+	nfp_ccm_mbox_run_queue_unlock(nn);
+	return nfp_ccm_mbox_skb_return(skb);
+
+err_unlock:
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+err_free_skb:
+	dev_kfree_skb_any(skb);
+	return err;
+}
+
+struct sk_buff *
+nfp_ccm_mbox_alloc(struct nfp_net *nn, unsigned int req_size,
+		   unsigned int reply_size, gfp_t flags)
+{
+	unsigned int max_size;
+	struct sk_buff *skb;
+
+	if (!reply_size)
+		max_size = nfp_ccm_mbox_max_msg(nn);
+	else
+		max_size = max(req_size, reply_size);
+	max_size = round_up(max_size, 4);
+
+	skb = alloc_skb(max_size, flags);
+	if (!skb)
+		return NULL;
+
+	skb_put(skb, req_size);
+
+	return skb;
+}
+
+bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size)
+{
+	return nfp_ccm_mbox_max_msg(nn) >= size;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
new file mode 100644
index 000000000000..1f97fb443134
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#ifndef NFP_CRYPTO_H
+#define NFP_CRYPTO_H 1
+
+struct nfp_net_tls_offload_ctx {
+	__be32 fw_handle[2];
+
+	u32 next_seq;
+	bool out_of_sync;
+};
+
+#ifdef CONFIG_TLS_DEVICE
+int nfp_net_tls_init(struct nfp_net *nn);
+#else
+static inline int nfp_net_tls_init(struct nfp_net *nn)
+{
+	return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h
new file mode 100644
index 000000000000..192ba907d91b
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#ifndef NFP_CRYPTO_FW_H
+#define NFP_CRYPTO_FW_H 1
+
+#include "../ccm.h"
+
+#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC	0
+#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC	1
+
+struct nfp_crypto_reply_simple {
+	struct nfp_ccm_hdr hdr;
+	__be32 error;
+};
+
+struct nfp_crypto_req_reset {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+};
+
+#define NFP_NET_TLS_IPVER		GENMASK(15, 12)
+#define NFP_NET_TLS_VLAN		GENMASK(11, 0)
+#define NFP_NET_TLS_VLAN_UNUSED			4095
+
+struct nfp_crypto_req_add_front {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+	u8 resv[3];
+	u8 opcode;
+	u8 key_len;
+	__be16 ipver_vlan __packed;
+	u8 l4_proto;
+};
+
+struct nfp_crypto_req_add_back {
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 key[8];
+	__be32 salt;
+	__be32 iv[2];
+	__be32 counter;
+	__be32 rec_no[2];
+	__be32 tcp_seq;
+};
+
+struct nfp_crypto_req_add_v4 {
+	struct nfp_crypto_req_add_front front;
+	__be32 src_ip;
+	__be32 dst_ip;
+	struct nfp_crypto_req_add_back back;
+};
+
+struct nfp_crypto_req_add_v6 {
+	struct nfp_crypto_req_add_front front;
+	__be32 src_ip[4];
+	__be32 dst_ip[4];
+	struct nfp_crypto_req_add_back back;
+};
+
+struct nfp_crypto_reply_add {
+	struct nfp_ccm_hdr hdr;
+	__be32 error;
+	__be32 handle[2];
+};
+
+struct nfp_crypto_req_del {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+	__be32 handle[2];
+};
+
+struct nfp_crypto_req_update {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+	u8 resv[3];
+	u8 opcode;
+	__be32 handle[2];
+	__be32 rec_no[2];
+	__be32 tcp_seq;
+};
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
new file mode 100644
index 000000000000..c638223e9f60
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/bitfield.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <net/tls.h>
+
+#include "../ccm.h"
+#include "../nfp_net.h"
+#include "crypto.h"
+#include "fw.h"
+
+#define NFP_NET_TLS_CCM_MBOX_OPS_MASK		\
+	(BIT(NFP_CCM_TYPE_CRYPTO_RESET) |	\
+	 BIT(NFP_CCM_TYPE_CRYPTO_ADD) |		\
+	 BIT(NFP_CCM_TYPE_CRYPTO_DEL) |		\
+	 BIT(NFP_CCM_TYPE_CRYPTO_UPDATE))
+
+#define NFP_NET_TLS_OPCODE_MASK_RX			\
+	BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC)
+
+#define NFP_NET_TLS_OPCODE_MASK_TX			\
+	BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC)
+
+#define NFP_NET_TLS_OPCODE_MASK						\
+	(NFP_NET_TLS_OPCODE_MASK_RX | NFP_NET_TLS_OPCODE_MASK_TX)
+
+static void nfp_net_crypto_set_op(struct nfp_net *nn, u8 opcode, bool on)
+{
+	u32 off, val;
+
+	off = nn->tlv_caps.crypto_enable_off + round_down(opcode / 8, 4);
+
+	val = nn_readl(nn, off);
+	if (on)
+		val |= BIT(opcode & 31);
+	else
+		val &= ~BIT(opcode & 31);
+	nn_writel(nn, off, val);
+}
+
+static bool
+__nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add,
+			       enum tls_offload_ctx_dir direction)
+{
+	u8 opcode;
+	int cnt;
+
+	opcode = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC;
+	nn->ktls_tx_conn_cnt += add;
+	cnt = nn->ktls_tx_conn_cnt;
+	nn->dp.ktls_tx = !!nn->ktls_tx_conn_cnt;
+
+	/* Care only about 0 -> 1 and 1 -> 0 transitions */
+	if (cnt > 1)
+		return false;
+
+	nfp_net_crypto_set_op(nn, opcode, cnt);
+	return true;
+}
+
+static int
+nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add,
+			     enum tls_offload_ctx_dir direction)
+{
+	int ret = 0;
+
+	/* Use the BAR lock to protect the connection counts */
+	nn_ctrl_bar_lock(nn);
+	if (__nfp_net_tls_conn_cnt_changed(nn, add, direction)) {
+		ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO);
+		/* Undo the cnt adjustment if failed */
+		if (ret)
+			__nfp_net_tls_conn_cnt_changed(nn, -add, direction);
+	}
+	nn_ctrl_bar_unlock(nn);
+
+	return ret;
+}
+
+static int
+nfp_net_tls_conn_add(struct nfp_net *nn, enum tls_offload_ctx_dir direction)
+{
+	return nfp_net_tls_conn_cnt_changed(nn, 1, direction);
+}
+
+static int
+nfp_net_tls_conn_remove(struct nfp_net *nn, enum tls_offload_ctx_dir direction)
+{
+	return nfp_net_tls_conn_cnt_changed(nn, -1, direction);
+}
+
+static struct sk_buff *
+nfp_net_tls_alloc_simple(struct nfp_net *nn, size_t req_sz, gfp_t flags)
+{
+	return nfp_ccm_mbox_alloc(nn, req_sz,
+				  sizeof(struct nfp_crypto_reply_simple),
+				  flags);
+}
+
+static int
+nfp_net_tls_communicate_simple(struct nfp_net *nn, struct sk_buff *skb,
+			       const char *name, enum nfp_ccm_type type)
+{
+	struct nfp_crypto_reply_simple *reply;
+	int err;
+
+	err = nfp_ccm_mbox_communicate(nn, skb, type,
+				       sizeof(*reply), sizeof(*reply));
+	if (err) {
+		nn_dp_warn(&nn->dp, "failed to %s TLS: %d\n", name, err);
+		return err;
+	}
+
+	reply = (void *)skb->data;
+	err = -be32_to_cpu(reply->error);
+	if (err)
+		nn_dp_warn(&nn->dp, "failed to %s TLS, fw replied: %d\n",
+			   name, err);
+	dev_consume_skb_any(skb);
+
+	return err;
+}
+
+static void nfp_net_tls_del_fw(struct nfp_net *nn, __be32 *fw_handle)
+{
+	struct nfp_crypto_req_del *req;
+	struct sk_buff *skb;
+
+	skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL);
+	if (!skb)
+		return;
+
+	req = (void *)skb->data;
+	req->ep_id = 0;
+	memcpy(req->handle, fw_handle, sizeof(req->handle));
+
+	nfp_net_tls_communicate_simple(nn, skb, "delete",
+				       NFP_CCM_TYPE_CRYPTO_DEL);
+}
+
+static struct nfp_crypto_req_add_back *
+nfp_net_tls_set_ipv4(struct nfp_crypto_req_add_v4 *req, struct sock *sk,
+		     int direction)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	req->front.key_len += sizeof(__be32) * 2;
+	req->front.ipver_vlan = cpu_to_be16(FIELD_PREP(NFP_NET_TLS_IPVER, 4) |
+					    FIELD_PREP(NFP_NET_TLS_VLAN,
+						       NFP_NET_TLS_VLAN_UNUSED));
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		req->src_ip = inet->inet_saddr;
+		req->dst_ip = inet->inet_daddr;
+	} else {
+		req->src_ip = inet->inet_daddr;
+		req->dst_ip = inet->inet_saddr;
+	}
+
+	return &req->back;
+}
+
+static struct nfp_crypto_req_add_back *
+nfp_net_tls_set_ipv6(struct nfp_crypto_req_add_v6 *req, struct sock *sk,
+		     int direction)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	req->front.key_len += sizeof(struct in6_addr) * 2;
+	req->front.ipver_vlan = cpu_to_be16(FIELD_PREP(NFP_NET_TLS_IPVER, 6) |
+					    FIELD_PREP(NFP_NET_TLS_VLAN,
+						       NFP_NET_TLS_VLAN_UNUSED));
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		memcpy(req->src_ip, &np->saddr, sizeof(req->src_ip));
+		memcpy(req->dst_ip, &sk->sk_v6_daddr, sizeof(req->dst_ip));
+	} else {
+		memcpy(req->src_ip, &sk->sk_v6_daddr, sizeof(req->src_ip));
+		memcpy(req->dst_ip, &np->saddr, sizeof(req->dst_ip));
+	}
+
+#endif
+	return &req->back;
+}
+
+static void
+nfp_net_tls_set_l4(struct nfp_crypto_req_add_front *front,
+		   struct nfp_crypto_req_add_back *back, struct sock *sk,
+		   int direction)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	front->l4_proto = IPPROTO_TCP;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		back->src_port = inet->inet_sport;
+		back->dst_port = inet->inet_dport;
+	} else {
+		back->src_port = inet->inet_dport;
+		back->dst_port = inet->inet_sport;
+	}
+}
+
+static u8 nfp_tls_1_2_dir_to_opcode(enum tls_offload_ctx_dir direction)
+{
+	switch (direction) {
+	case TLS_OFFLOAD_CTX_DIR_TX:
+		return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC;
+	case TLS_OFFLOAD_CTX_DIR_RX:
+		return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC;
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+static bool
+nfp_net_cipher_supported(struct nfp_net *nn, u16 cipher_type,
+			 enum tls_offload_ctx_dir direction)
+{
+	u8 bit;
+
+	switch (cipher_type) {
+	case TLS_CIPHER_AES_GCM_128:
+		if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+			bit = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC;
+		else
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return nn->tlv_caps.crypto_ops & BIT(bit);
+}
+
+static int
+nfp_net_tls_add(struct net_device *netdev, struct sock *sk,
+		enum tls_offload_ctx_dir direction,
+		struct tls_crypto_info *crypto_info,
+		u32 start_offload_tcp_sn)
+{
+	struct tls12_crypto_info_aes_gcm_128 *tls_ci;
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_crypto_req_add_front *front;
+	struct nfp_net_tls_offload_ctx *ntls;
+	struct nfp_crypto_req_add_back *back;
+	struct nfp_crypto_reply_add *reply;
+	struct sk_buff *skb;
+	size_t req_sz;
+	bool ipv6;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct nfp_net_tls_offload_ctx) >
+		     TLS_DRIVER_STATE_SIZE_TX);
+
+	if (!nfp_net_cipher_supported(nn, crypto_info->cipher_type, direction))
+		return -EOPNOTSUPP;
+
+	switch (sk->sk_family) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		if (sk->sk_ipv6only ||
+		    ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) {
+			req_sz = sizeof(struct nfp_crypto_req_add_v6);
+			ipv6 = true;
+			break;
+		}
+#endif
+		/* fall through */
+	case AF_INET:
+		req_sz = sizeof(struct nfp_crypto_req_add_v4);
+		ipv6 = false;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nfp_net_tls_conn_add(nn, direction);
+	if (err)
+		return err;
+
+	skb = nfp_ccm_mbox_alloc(nn, req_sz, sizeof(*reply), GFP_KERNEL);
+	if (!skb) {
+		err = -ENOMEM;
+		goto err_conn_remove;
+	}
+
+	front = (void *)skb->data;
+	front->ep_id = 0;
+	front->key_len = 8;
+	front->opcode = nfp_tls_1_2_dir_to_opcode(direction);
+	memset(front->resv, 0, sizeof(front->resv));
+
+	if (ipv6)
+		back = nfp_net_tls_set_ipv6((void *)skb->data, sk, direction);
+	else
+		back = nfp_net_tls_set_ipv4((void *)skb->data, sk, direction);
+
+	nfp_net_tls_set_l4(front, back, sk, direction);
+
+	back->counter = 0;
+	back->tcp_seq = cpu_to_be32(start_offload_tcp_sn);
+
+	tls_ci = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+	memcpy(back->key, tls_ci->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+	memset(&back->key[TLS_CIPHER_AES_GCM_128_KEY_SIZE / 4], 0,
+	       sizeof(back->key) - TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+	memcpy(back->iv, tls_ci->iv, TLS_CIPHER_AES_GCM_128_IV_SIZE);
+	memcpy(&back->salt, tls_ci->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	memcpy(back->rec_no, tls_ci->rec_seq, sizeof(tls_ci->rec_seq));
+
+	err = nfp_ccm_mbox_communicate(nn, skb, NFP_CCM_TYPE_CRYPTO_ADD,
+				       sizeof(*reply), sizeof(*reply));
+	if (err) {
+		nn_dp_warn(&nn->dp, "failed to add TLS: %d\n", err);
+		/* communicate frees skb on error */
+		goto err_conn_remove;
+	}
+
+	reply = (void *)skb->data;
+	err = -be32_to_cpu(reply->error);
+	if (err) {
+		if (err == -ENOSPC) {
+			if (!atomic_fetch_inc(&nn->ktls_no_space))
+				nn_info(nn, "HW TLS table full\n");
+		} else {
+			nn_dp_warn(&nn->dp,
+				   "failed to add TLS, FW replied: %d\n", err);
+		}
+		goto err_free_skb;
+	}
+
+	if (!reply->handle[0] && !reply->handle[1]) {
+		nn_dp_warn(&nn->dp, "FW returned NULL handle\n");
+		goto err_fw_remove;
+	}
+
+	ntls = tls_driver_ctx(sk, direction);
+	memcpy(ntls->fw_handle, reply->handle, sizeof(ntls->fw_handle));
+	ntls->next_seq = start_offload_tcp_sn;
+	dev_consume_skb_any(skb);
+
+	return 0;
+
+err_fw_remove:
+	nfp_net_tls_del_fw(nn, reply->handle);
+err_free_skb:
+	dev_consume_skb_any(skb);
+err_conn_remove:
+	nfp_net_tls_conn_remove(nn, direction);
+	return err;
+}
+
+static void
+nfp_net_tls_del(struct net_device *netdev, struct tls_context *tls_ctx,
+		enum tls_offload_ctx_dir direction)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_tls_offload_ctx *ntls;
+
+	nfp_net_tls_conn_remove(nn, direction);
+
+	ntls = __tls_driver_ctx(tls_ctx, direction);
+	nfp_net_tls_del_fw(nn, ntls->fw_handle);
+}
+
+static const struct tlsdev_ops nfp_net_tls_ops = {
+	.tls_dev_add = nfp_net_tls_add,
+	.tls_dev_del = nfp_net_tls_del,
+};
+
+static int nfp_net_tls_reset(struct nfp_net *nn)
+{
+	struct nfp_crypto_req_reset *req;
+	struct sk_buff *skb;
+
+	skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	req = (void *)skb->data;
+	req->ep_id = 0;
+
+	return nfp_net_tls_communicate_simple(nn, skb, "reset",
+					      NFP_CCM_TYPE_CRYPTO_RESET);
+}
+
+int nfp_net_tls_init(struct nfp_net *nn)
+{
+	struct net_device *netdev = nn->dp.netdev;
+	int err;
+
+	if (!(nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK))
+		return 0;
+
+	if ((nn->tlv_caps.mbox_cmsg_types & NFP_NET_TLS_CCM_MBOX_OPS_MASK) !=
+	    NFP_NET_TLS_CCM_MBOX_OPS_MASK)
+		return 0;
+
+	if (!nfp_ccm_mbox_fits(nn, sizeof(struct nfp_crypto_req_add_v6))) {
+		nn_warn(nn, "disabling TLS offload - mbox too small: %d\n",
+			nn->tlv_caps.mbox_len);
+		return 0;
+	}
+
+	err = nfp_net_tls_reset(nn);
+	if (err)
+		return err;
+
+	nn_ctrl_bar_lock(nn);
+	nn_writel(nn, nn->tlv_caps.crypto_enable_off, 0);
+	err = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO);
+	nn_ctrl_bar_unlock(nn);
+	if (err)
+		return err;
+
+	if (nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK_TX) {
+		netdev->hw_features |= NETIF_F_HW_TLS_TX;
+		netdev->features |= NETIF_F_HW_TLS_TX;
+	}
+
+	netdev->tlsdev_ops = &nfp_net_tls_ops;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index df9aff2684ed..661fa5941b91 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -12,11 +12,13 @@
 #ifndef _NFP_NET_H_
 #define _NFP_NET_H_
 
+#include <linux/atomic.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/semaphore.h>
 #include <net/xdp.h>
 
 #include "nfp_net_ctrl.h"
@@ -372,6 +374,11 @@ struct nfp_net_rx_ring {
  * @hw_csum_tx_inner:	 Counter of inner TX checksum offload requests
  * @tx_gather:	    Counter of packets with Gather DMA
  * @tx_lso:	    Counter of LSO packets sent
+ * @hw_tls_tx:	    Counter of TLS packets sent with crypto offloaded to HW
+ * @tls_tx_fallback:	Counter of TLS packets sent which had to be encrypted
+ *			by the fallback path because packets came out of order
+ * @tls_tx_no_fallback:	Counter of TLS packets not sent because the fallback
+ *			path could not encrypt them
  * @tx_errors:	    How many TX errors were encountered
  * @tx_busy:        How often was TX busy (no space)?
  * @rx_replace_buf_alloc_fail:	Counter of RX buffer allocation failures
@@ -409,21 +416,28 @@ struct nfp_net_r_vector {
 	u64 hw_csum_rx_inner_ok;
 	u64 hw_csum_rx_complete;
 
+	u64 hw_csum_rx_error;
+	u64 rx_replace_buf_alloc_fail;
+
 	struct nfp_net_tx_ring *xdp_ring;
 
 	struct u64_stats_sync tx_sync;
 	u64 tx_pkts;
 	u64 tx_bytes;
-	u64 hw_csum_tx;
+
+	u64 ____cacheline_aligned_in_smp hw_csum_tx;
 	u64 hw_csum_tx_inner;
 	u64 tx_gather;
 	u64 tx_lso;
+	u64 hw_tls_tx;
 
-	u64 hw_csum_rx_error;
-	u64 rx_replace_buf_alloc_fail;
+	u64 tls_tx_fallback;
+	u64 tls_tx_no_fallback;
 	u64 tx_errors;
 	u64 tx_busy;
 
+	/* Cold data follows */
+
 	u32 irq_vector;
 	irq_handler_t handler;
 	char name[IFNAMSIZ + 8];
@@ -458,6 +472,7 @@ struct nfp_stat_pair {
  * @netdev:		Backpointer to net_device structure
  * @is_vf:		Is the driver attached to a VF?
  * @chained_metadata_format:  Firemware will use new metadata format
+ * @ktls_tx:		Is kTLS TX enabled?
  * @rx_dma_dir:		Mapping direction for RX buffers
  * @rx_dma_off:		Offset at which DMA packets (for XDP headroom)
  * @rx_offset:		Offset in the RX buffers where packet data starts
@@ -482,6 +497,7 @@ struct nfp_net_dp {
 
 	u8 is_vf:1;
 	u8 chained_metadata_format:1;
+	u8 ktls_tx:1;
 
 	u8 rx_dma_dir;
 	u8 rx_offset;
@@ -549,7 +565,7 @@ struct nfp_net_dp {
  * @reconfig_timer:	Timer for async reading of reconfig results
  * @reconfig_in_progress_update:	Update FW is processing now (debug only)
  * @bar_lock:		vNIC config BAR access lock, protects: update,
- *			mailbox area
+ *			mailbox area, crypto TLV
  * @link_up:            Is the link up?
  * @link_status_lock:	Protects @link_* and ensures atomicity with BAR reading
  * @rx_coalesce_usecs:      RX interrupt moderation usecs delay parameter
@@ -562,6 +578,13 @@ struct nfp_net_dp {
  * @tx_bar:             Pointer to mapped TX queues
  * @rx_bar:             Pointer to mapped FL/RX queues
  * @tlv_caps:		Parsed TLV capabilities
+ * @ktls_tx_conn_cnt:	Number of offloaded kTLS TX connections
+ * @ktls_no_space:	Counter of firmware rejecting kTLS connection due to
+ *			lack of space
+ * @mbox_cmsg:		Common Control Message via vNIC mailbox state
+ * @mbox_cmsg.queue:	CCM mbox queue of pending messages
+ * @mbox_cmsg.wq:	CCM mbox wait queue of waiting processes
+ * @mbox_cmsg.tag:	CCM mbox message tag allocator
  * @debugfs_dir:	Device directory in debugfs
  * @vnic_list:		Entry on device vNIC list
  * @pdev:		Backpointer to PCI device
@@ -620,7 +643,7 @@ struct nfp_net {
 	struct timer_list reconfig_timer;
 	u32 reconfig_in_progress_update;
 
-	struct mutex bar_lock;
+	struct semaphore bar_lock;
 
 	u32 rx_coalesce_usecs;
 	u32 rx_coalesce_max_frames;
@@ -637,6 +660,16 @@ struct nfp_net {
 
 	struct nfp_net_tlv_caps tlv_caps;
 
+	unsigned int ktls_tx_conn_cnt;
+
+	atomic_t ktls_no_space;
+
+	struct {
+		struct sk_buff_head queue;
+		wait_queue_head_t wq;
+		u16 tag;
+	} mbox_cmsg;
+
 	struct dentry *debugfs_dir;
 
 	struct list_head vnic_list;
@@ -848,12 +881,12 @@ static inline void nfp_ctrl_unlock(struct nfp_net *nn)
 
 static inline void nn_ctrl_bar_lock(struct nfp_net *nn)
 {
-	mutex_lock(&nn->bar_lock);
+	down(&nn->bar_lock);
 }
 
 static inline void nn_ctrl_bar_unlock(struct nfp_net *nn)
 {
-	mutex_unlock(&nn->bar_lock);
+	up(&nn->bar_lock);
 }
 
 /* Globals */
@@ -883,6 +916,7 @@ void nfp_ctrl_close(struct nfp_net *nn);
 
 void nfp_net_set_ethtool_ops(struct net_device *netdev);
 void nfp_net_info(struct nfp_net *nn);
+int __nfp_net_reconfig(struct nfp_net *nn, u32 update);
 int nfp_net_reconfig(struct nfp_net *nn, u32 update);
 unsigned int nfp_net_rss_key_sz(struct nfp_net *nn);
 void nfp_net_rss_write_itbl(struct nfp_net *nn);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index b82b684f52ce..e221847d9a3e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -23,7 +23,6 @@
 #include <linux/interrupt.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include <linux/lockdep.h>
 #include <linux/mm.h>
 #include <linux/overflow.h>
 #include <linux/page_ref.h>
@@ -37,6 +36,7 @@
 #include <linux/vmalloc.h>
 #include <linux/ktime.h>
 
+#include <net/tls.h>
 #include <net/vxlan.h>
 
 #include "nfpcore/nfp_nsp.h"
@@ -45,6 +45,7 @@
 #include "nfp_net.h"
 #include "nfp_net_sriov.h"
 #include "nfp_port.h"
+#include "crypto/crypto.h"
 
 /**
  * nfp_net_get_fw_version() - Read and parse the FW version
@@ -271,12 +272,10 @@ static void nfp_net_reconfig_wait_posted(struct nfp_net *nn)
  *
  * Return: Negative errno on error, 0 on success
  */
-static int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
+int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
 {
 	int ret;
 
-	lockdep_assert_held(&nn->bar_lock);
-
 	nfp_net_reconfig_sync_enter(nn);
 
 	nfp_net_reconfig_start(nn, update);
@@ -331,7 +330,6 @@ int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd)
 	u32 mbox = nn->tlv_caps.mbox_off;
 	int ret;
 
-	lockdep_assert_held(&nn->bar_lock);
 	nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 
 	ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
@@ -804,6 +802,72 @@ static void nfp_net_tx_csum(struct nfp_net_dp *dp,
 	u64_stats_update_end(&r_vec->tx_sync);
 }
 
+#ifdef CONFIG_TLS_DEVICE
+static struct sk_buff *
+nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
+	       struct sk_buff *skb, u64 *tls_handle, int *nr_frags)
+{
+	struct nfp_net_tls_offload_ctx *ntls;
+	struct sk_buff *nskb;
+	u32 datalen, seq;
+
+	if (likely(!dp->ktls_tx))
+		return skb;
+	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+		return skb;
+
+	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+	seq = ntohl(tcp_hdr(skb)->seq);
+	ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
+	if (unlikely(ntls->next_seq != seq || ntls->out_of_sync)) {
+		/* Pure ACK out of order already */
+		if (!datalen)
+			return skb;
+
+		u64_stats_update_begin(&r_vec->tx_sync);
+		r_vec->tls_tx_fallback++;
+		u64_stats_update_end(&r_vec->tx_sync);
+
+		nskb = tls_encrypt_skb(skb);
+		if (!nskb) {
+			u64_stats_update_begin(&r_vec->tx_sync);
+			r_vec->tls_tx_no_fallback++;
+			u64_stats_update_end(&r_vec->tx_sync);
+			return NULL;
+		}
+		/* encryption wasn't necessary */
+		if (nskb == skb)
+			return skb;
+		/* we don't re-check ring space */
+		if (unlikely(skb_is_nonlinear(nskb))) {
+			nn_dp_warn(dp, "tls_encrypt_skb() produced fragmented frame\n");
+			u64_stats_update_begin(&r_vec->tx_sync);
+			r_vec->tx_errors++;
+			u64_stats_update_end(&r_vec->tx_sync);
+			dev_kfree_skb_any(nskb);
+			return NULL;
+		}
+
+		/* jump forward, a TX may have gotten lost, need to sync TX */
+		if (!ntls->out_of_sync && seq - ntls->next_seq < U32_MAX / 4)
+			ntls->out_of_sync = true;
+
+		*nr_frags = 0;
+		return nskb;
+	}
+
+	if (datalen) {
+		u64_stats_update_begin(&r_vec->tx_sync);
+		r_vec->hw_tls_tx++;
+		u64_stats_update_end(&r_vec->tx_sync);
+	}
+
+	memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle));
+	ntls->next_seq += datalen;
+	return skb;
+}
+#endif
+
 static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
 {
 	wmb();
@@ -811,24 +875,47 @@ static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
 	tx_ring->wr_ptr_add = 0;
 }
 
-static int nfp_net_prep_port_id(struct sk_buff *skb)
+static int nfp_net_prep_tx_meta(struct sk_buff *skb, u64 tls_handle)
 {
 	struct metadata_dst *md_dst = skb_metadata_dst(skb);
 	unsigned char *data;
+	u32 meta_id = 0;
+	int md_bytes;
 
-	if (likely(!md_dst))
-		return 0;
-	if (unlikely(md_dst->type != METADATA_HW_PORT_MUX))
+	if (likely(!md_dst && !tls_handle))
 		return 0;
+	if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) {
+		if (!tls_handle)
+			return 0;
+		md_dst = NULL;
+	}
 
-	if (unlikely(skb_cow_head(skb, 8)))
+	md_bytes = 4 + !!md_dst * 4 + !!tls_handle * 8;
+
+	if (unlikely(skb_cow_head(skb, md_bytes)))
 		return -ENOMEM;
 
-	data = skb_push(skb, 8);
-	put_unaligned_be32(NFP_NET_META_PORTID, data);
-	put_unaligned_be32(md_dst->u.port_info.port_id, data + 4);
+	meta_id = 0;
+	data = skb_push(skb, md_bytes) + md_bytes;
+	if (md_dst) {
+		data -= 4;
+		put_unaligned_be32(md_dst->u.port_info.port_id, data);
+		meta_id = NFP_NET_META_PORTID;
+	}
+	if (tls_handle) {
+		/* conn handle is opaque, we just use u64 to be able to quickly
+		 * compare it to zero
+		 */
+		data -= 8;
+		memcpy(data, &tls_handle, sizeof(tls_handle));
+		meta_id <<= NFP_NET_META_FIELD_SIZE;
+		meta_id |= NFP_NET_META_CONN_HANDLE;
+	}
 
-	return 8;
+	data -= 4;
+	put_unaligned_be32(meta_id, data);
+
+	return md_bytes;
 }
 
 /**
@@ -851,6 +938,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 	struct nfp_net_dp *dp;
 	dma_addr_t dma_addr;
 	unsigned int fsize;
+	u64 tls_handle = 0;
 	u16 qidx;
 
 	dp = &nn->dp;
@@ -872,18 +960,23 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 		return NETDEV_TX_BUSY;
 	}
 
-	md_bytes = nfp_net_prep_port_id(skb);
-	if (unlikely(md_bytes < 0)) {
+#ifdef CONFIG_TLS_DEVICE
+	skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags);
+	if (unlikely(!skb)) {
 		nfp_net_tx_xmit_more_flush(tx_ring);
-		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
 	}
+#endif
+
+	md_bytes = nfp_net_prep_tx_meta(skb, tls_handle);
+	if (unlikely(md_bytes < 0))
+		goto err_flush;
 
 	/* Start with the head skbuf */
 	dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
 				  DMA_TO_DEVICE);
 	if (dma_mapping_error(dp->dev, dma_addr))
-		goto err_free;
+		goto err_dma_err;
 
 	wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
 
@@ -979,8 +1072,9 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 	tx_ring->txbufs[wr_idx].skb = NULL;
 	tx_ring->txbufs[wr_idx].dma_addr = 0;
 	tx_ring->txbufs[wr_idx].fidx = -2;
-err_free:
+err_dma_err:
 	nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
+err_flush:
 	nfp_net_tx_xmit_more_flush(tx_ring);
 	u64_stats_update_begin(&r_vec->tx_sync);
 	r_vec->tx_errors++;
@@ -3704,13 +3798,16 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
 	nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
 	nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
 
-	mutex_init(&nn->bar_lock);
+	sema_init(&nn->bar_lock, 1);
 
 	spin_lock_init(&nn->reconfig_lock);
 	spin_lock_init(&nn->link_status_lock);
 
 	timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0);
 
+	skb_queue_head_init(&nn->mbox_cmsg.queue);
+	init_waitqueue_head(&nn->mbox_cmsg.wq);
+
 	err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar,
 				     &nn->tlv_caps);
 	if (err)
@@ -3733,8 +3830,7 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
 void nfp_net_free(struct nfp_net *nn)
 {
 	WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
-
-	mutex_destroy(&nn->bar_lock);
+	WARN_ON(!skb_queue_empty(&nn->mbox_cmsg.queue));
 
 	if (nn->dp.netdev)
 		free_netdev(nn->dp.netdev);
@@ -4009,9 +4105,14 @@ int nfp_net_init(struct nfp_net *nn)
 	if (err)
 		return err;
 
-	if (nn->dp.netdev)
+	if (nn->dp.netdev) {
 		nfp_net_netdev_init(nn);
 
+		err = nfp_net_tls_init(nn);
+		if (err)
+			return err;
+	}
+
 	nfp_net_vecs_init(nn);
 
 	if (!nn->dp.netdev)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
index 6d5213b5bcb0..d835c14b7257 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
@@ -99,6 +99,21 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
 
 			caps->repr_cap = readl(data);
 			break;
+		case NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES:
+			if (length >= 4)
+				caps->mbox_cmsg_types = readl(data);
+			break;
+		case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS:
+			if (length < 32) {
+				dev_err(dev,
+					"CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n",
+					length, offset);
+				return -EINVAL;
+			}
+
+			caps->crypto_ops = readl(data);
+			caps->crypto_enable_off = data - ctrl_mem + 16;
+			break;
 		default:
 			if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr))
 				break;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 25919e338071..ee6b24e4eacd 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -44,6 +44,7 @@
 #define NFP_NET_META_MARK		2
 #define NFP_NET_META_PORTID		5
 #define NFP_NET_META_CSUM		6 /* checksum complete type */
+#define NFP_NET_META_CONN_HANDLE	7
 
 #define NFP_META_PORT_ID_CTRL		~0U
 
@@ -135,6 +136,7 @@
 #define   NFP_NET_CFG_UPDATE_MACADDR	  (0x1 << 11) /* MAC address change */
 #define   NFP_NET_CFG_UPDATE_MBOX	  (0x1 << 12) /* Mailbox update */
 #define   NFP_NET_CFG_UPDATE_VF		  (0x1 << 13) /* VF settings change */
+#define   NFP_NET_CFG_UPDATE_CRYPTO	  (0x1 << 14) /* Crypto on/off */
 #define   NFP_NET_CFG_UPDATE_ERR	  (0x1 << 31) /* A error occurred */
 #define NFP_NET_CFG_TXRS_ENABLE		0x0008
 #define NFP_NET_CFG_RXRS_ENABLE		0x0010
@@ -394,6 +396,7 @@
 #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2
 
 #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET	5
+#define NFP_NET_CFG_MBOX_CMD_TLV_CMSG			6
 
 /**
  * VLAN filtering using general use mailbox
@@ -466,6 +469,16 @@
  * %NFP_NET_CFG_TLV_TYPE_REPR_CAP:
  * Single word, equivalent of %NFP_NET_CFG_CAP for representors, features which
  * can be used on representors.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES:
+ * Variable, bitmap of control message types supported by the mailbox handler.
+ * Bit 0 corresponds to message type 0, bit 1 to 1, etc.  Control messages are
+ * encapsulated into simple TLVs, with an end TLV and written to the Mailbox.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS:
+ * 8 words, bitmaps of supported and enabled crypto operations.
+ * First 16B (4 words) contains a bitmap of supported crypto operations,
+ * and next 16B contain the enabled operations.
  */
 #define NFP_NET_CFG_TLV_TYPE_UNKNOWN		0
 #define NFP_NET_CFG_TLV_TYPE_RESERVED		1
@@ -475,6 +488,8 @@
 #define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0	5
 #define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1	6
 #define NFP_NET_CFG_TLV_TYPE_REPR_CAP		7
+#define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES	10
+#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS		11 /* see crypto/fw.h */
 
 struct device;
 
@@ -484,12 +499,18 @@ struct device;
  * @mbox_off:		vNIC mailbox area offset
  * @mbox_len:		vNIC mailbox area length
  * @repr_cap:		capabilities for representors
+ * @mbox_cmsg_types:	cmsgs which can be passed through the mailbox
+ * @crypto_ops:		supported crypto operations
+ * @crypto_enable_off:	offset of crypto ops enable region
  */
 struct nfp_net_tlv_caps {
 	u32 me_freq_mhz;
 	unsigned int mbox_off;
 	unsigned int mbox_len;
 	u32 repr_cap;
+	u32 mbox_cmsg_types;
+	u32 crypto_ops;
+	unsigned int crypto_enable_off;
 };
 
 int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 851e31e0ba8e..3a8e1af7042d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -150,8 +150,9 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = {
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
 #define NN_ET_SWITCH_STATS_LEN 9
-#define NN_RVEC_GATHER_STATS	9
+#define NN_RVEC_GATHER_STATS	12
 #define NN_RVEC_PER_Q_STATS	3
+#define NN_CTRL_PATH_STATS	1
 
 #define SFP_SFF_REV_COMPLIANCE	1
 
@@ -423,7 +424,8 @@ static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 
-	return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS;
+	return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS +
+		NN_CTRL_PATH_STATS;
 }
 
 static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
@@ -446,6 +448,11 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
 	data = nfp_pr_et(data, "hw_tx_inner_csum");
 	data = nfp_pr_et(data, "tx_gather");
 	data = nfp_pr_et(data, "tx_lso");
+	data = nfp_pr_et(data, "tx_tls_encrypted");
+	data = nfp_pr_et(data, "tx_tls_ooo");
+	data = nfp_pr_et(data, "tx_tls_drop_no_sync_data");
+
+	data = nfp_pr_et(data, "hw_tls_no_space");
 
 	return data;
 }
@@ -478,6 +485,9 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
 			tmp[6] = nn->r_vecs[i].hw_csum_tx_inner;
 			tmp[7] = nn->r_vecs[i].tx_gather;
 			tmp[8] = nn->r_vecs[i].tx_lso;
+			tmp[9] = nn->r_vecs[i].hw_tls_tx;
+			tmp[10] = nn->r_vecs[i].tls_tx_fallback;
+			tmp[11] = nn->r_vecs[i].tls_tx_no_fallback;
 		} while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start));
 
 		data += NN_RVEC_PER_Q_STATS;
@@ -489,6 +499,8 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
 	for (j = 0; j < NN_RVEC_GATHER_STATS; j++)
 		*data++ = gathered_stats[j];
 
+	*data++ = atomic_read(&nn->ktls_no_space);
+
 	return data;
 }
 
diff --git a/include/net/tls.h b/include/net/tls.h
index 0a0072636009..d1a4f365d6be 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -40,6 +40,7 @@
 #include <linux/socket.h>
 #include <linux/tcp.h>
 #include <linux/skmsg.h>
+#include <linux/netdevice.h>
 
 #include <net/tcp.h>
 #include <net/strparser.h>
@@ -197,17 +198,16 @@ struct tls_offload_context_tx {
 
 	struct scatterlist sg_tx_data[MAX_SKB_FRAGS];
 	void (*sk_destruct)(struct sock *sk);
-	u8 driver_state[];
+	u8 driver_state[] __aligned(8);
 	/* The TLS layer reserves room for driver specific state
 	 * Currently the belief is that there is not enough
 	 * driver specific state to justify another layer of indirection
 	 */
-#define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *)))
+#define TLS_DRIVER_STATE_SIZE_TX	16
 };
 
 #define TLS_OFFLOAD_CONTEXT_SIZE_TX                                            \
-	(ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) +        \
-	 TLS_DRIVER_STATE_SIZE)
+	(sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX)
 
 struct cipher_context {
 	char *iv;
@@ -302,16 +302,16 @@ struct tls_offload_context_rx {
 	/* sw must be the first member of tls_offload_context_rx */
 	struct tls_sw_context_rx sw;
 	atomic64_t resync_req;
-	u8 driver_state[];
+	u8 driver_state[] __aligned(8);
 	/* The TLS layer reserves room for driver specific state
 	 * Currently the belief is that there is not enough
 	 * driver specific state to justify another layer of indirection
 	 */
+#define TLS_DRIVER_STATE_SIZE_RX	8
 };
 
 #define TLS_OFFLOAD_CONTEXT_SIZE_RX					\
-	(ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \
-	 TLS_DRIVER_STATE_SIZE)
+	(sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX)
 
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 int tls_sk_query(struct sock *sk, int optname, char __user *optval,
@@ -556,6 +556,23 @@ tls_offload_ctx_rx(const struct tls_context *tls_ctx)
 	return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx;
 }
 
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+static inline void *__tls_driver_ctx(struct tls_context *tls_ctx,
+				     enum tls_offload_ctx_dir direction)
+{
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+		return tls_offload_ctx_tx(tls_ctx)->driver_state;
+	else
+		return tls_offload_ctx_rx(tls_ctx)->driver_state;
+}
+
+static inline void *
+tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction)
+{
+	return __tls_driver_ctx(tls_get_ctx(sk), direction);
+}
+#endif
+
 /* The TLS context is valid until sk_destruct is called */
 static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
 {
@@ -573,6 +590,7 @@ void tls_unregister_device(struct tls_device *device);
 int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
 int decrypt_skb(struct sock *sk, struct sk_buff *skb,
 		struct scatterlist *sgout);
+struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
 
 struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 				      struct net_device *dev,
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 5a087e1981c3..1d2d804ac633 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -426,6 +426,12 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(tls_validate_xmit_skb);
 
+struct sk_buff *tls_encrypt_skb(struct sk_buff *skb)
+{
+	return tls_sw_fallback(skb->sk, skb);
+}
+EXPORT_SYMBOL_GPL(tls_encrypt_skb);
+
 int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info)