mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 10:11:38 -04:00
Merge tag 'nf-next-26-01-29' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Florian Westphal says: ==================== netfilter: updates for net-next The following patchset contains Netfilter updates for *net-next*: Patches 1 to 4 add IP6IP6 tunneling acceleration to the flowtable infrastructure. Patch 5 extends test coverage for this. From Lorenzo Bianconi. Patch 6 removes a duplicated helper from xt_time extension, we can use an existing helper for this, from Jinjie Ruan. Patch 7 adds an rhashtable to nfnetink_queue to speed up out-of-order verdict processing. Before this list walk was required due to in-order design assumption. netfilter pull request nf-next-26-01-29 * tag 'nf-next-26-01-29' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: netfilter: nfnetlink_queue: optimize verdict lookup with hash table netfilter: xt_time: use is_leap_year() helper selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest netfilter: flowtable: Add IP6IP6 tx sw acceleration netfilter: flowtable: Add IP6IP6 rx sw acceleration netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct netfilter: Add ctx pointer in nf_flow_skb_encap_protocol/nf_flow_ip4_tunnel_proto signature ==================== Link: https://patch.msgid.link/20260129105427.12494-1-fw@strlen.de Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
@@ -6,11 +6,13 @@
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/netfilter.h>
|
||||
#include <linux/rhashtable-types.h>
|
||||
#include <linux/skbuff.h>
|
||||
|
||||
/* Each queued (to userspace) skbuff has one of these. */
|
||||
struct nf_queue_entry {
|
||||
struct list_head list;
|
||||
struct rhash_head hash_node;
|
||||
struct sk_buff *skb;
|
||||
unsigned int id;
|
||||
unsigned int hook_index; /* index in hook_entries->hook[] */
|
||||
@@ -20,6 +22,7 @@ struct nf_queue_entry {
|
||||
#endif
|
||||
struct nf_hook_state state;
|
||||
u16 size; /* sizeof(entry) + saved route keys */
|
||||
u16 queue_num;
|
||||
|
||||
/* extra space to store route keys */
|
||||
};
|
||||
|
||||
@@ -1828,6 +1828,32 @@ int ip6_tnl_encap_setup(struct ip6_tnl *t,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup);
|
||||
|
||||
static int ip6_tnl_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
struct net_device_path *path)
|
||||
{
|
||||
struct ip6_tnl *t = netdev_priv(ctx->dev);
|
||||
struct flowi6 fl6 = {
|
||||
.daddr = t->parms.raddr,
|
||||
};
|
||||
struct dst_entry *dst;
|
||||
int err;
|
||||
|
||||
dst = ip6_route_output(dev_net(ctx->dev), NULL, &fl6);
|
||||
if (!dst->error) {
|
||||
path->type = DEV_PATH_TUN;
|
||||
path->tun.src_v6 = t->parms.laddr;
|
||||
path->tun.dst_v6 = t->parms.raddr;
|
||||
path->tun.l3_proto = IPPROTO_IPV6;
|
||||
path->dev = ctx->dev;
|
||||
ctx->dev = dst->dev;
|
||||
}
|
||||
|
||||
err = dst->error;
|
||||
dst_release(dst);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct net_device_ops ip6_tnl_netdev_ops = {
|
||||
.ndo_init = ip6_tnl_dev_init,
|
||||
.ndo_uninit = ip6_tnl_dev_uninit,
|
||||
@@ -1836,6 +1862,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
|
||||
.ndo_change_mtu = ip6_tnl_change_mtu,
|
||||
.ndo_get_stats64 = dev_get_tstats64,
|
||||
.ndo_get_iflink = ip6_tnl_get_iflink,
|
||||
.ndo_fill_forward_path = ip6_tnl_fill_forward_path,
|
||||
};
|
||||
|
||||
#define IPXIPX_FEATURES (NETIF_F_SG | \
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <net/ip.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/ip6_route.h>
|
||||
#include <net/ip6_tunnel.h>
|
||||
#include <net/neighbour.h>
|
||||
#include <net/netfilter/nf_flow_table.h>
|
||||
#include <net/netfilter/nf_conntrack_acct.h>
|
||||
@@ -144,12 +145,26 @@ static bool ip_has_options(unsigned int thoff)
|
||||
return thoff != sizeof(struct iphdr);
|
||||
}
|
||||
|
||||
static void nf_flow_tuple_encap(struct sk_buff *skb,
|
||||
struct nf_flowtable_ctx {
|
||||
const struct net_device *in;
|
||||
u32 offset;
|
||||
u32 hdrsize;
|
||||
struct {
|
||||
/* Tunnel IP header size */
|
||||
u32 hdr_size;
|
||||
/* IP tunnel protocol */
|
||||
u8 proto;
|
||||
} tun;
|
||||
};
|
||||
|
||||
static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
|
||||
struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple)
|
||||
{
|
||||
__be16 inner_proto = skb->protocol;
|
||||
struct vlan_ethhdr *veth;
|
||||
struct pppoe_hdr *phdr;
|
||||
struct ipv6hdr *ip6h;
|
||||
struct iphdr *iph;
|
||||
u16 offset = 0;
|
||||
int i = 0;
|
||||
@@ -176,22 +191,28 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
|
||||
break;
|
||||
}
|
||||
|
||||
if (inner_proto == htons(ETH_P_IP)) {
|
||||
switch (inner_proto) {
|
||||
case htons(ETH_P_IP):
|
||||
iph = (struct iphdr *)(skb_network_header(skb) + offset);
|
||||
if (iph->protocol == IPPROTO_IPIP) {
|
||||
if (ctx->tun.proto == IPPROTO_IPIP) {
|
||||
tuple->tun.dst_v4.s_addr = iph->daddr;
|
||||
tuple->tun.src_v4.s_addr = iph->saddr;
|
||||
tuple->tun.l3_proto = IPPROTO_IPIP;
|
||||
}
|
||||
break;
|
||||
case htons(ETH_P_IPV6):
|
||||
ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
|
||||
if (ctx->tun.proto == IPPROTO_IPV6) {
|
||||
tuple->tun.dst_v6 = ip6h->daddr;
|
||||
tuple->tun.src_v6 = ip6h->saddr;
|
||||
tuple->tun.l3_proto = IPPROTO_IPV6;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
struct nf_flowtable_ctx {
|
||||
const struct net_device *in;
|
||||
u32 offset;
|
||||
u32 hdrsize;
|
||||
};
|
||||
|
||||
static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple)
|
||||
{
|
||||
@@ -259,7 +280,7 @@ static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
|
||||
tuple->l3proto = AF_INET;
|
||||
tuple->l4proto = ipproto;
|
||||
tuple->iifidx = ctx->in->ifindex;
|
||||
nf_flow_tuple_encap(skb, tuple);
|
||||
nf_flow_tuple_encap(ctx, skb, tuple);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -295,15 +316,16 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
|
||||
return NF_STOLEN;
|
||||
}
|
||||
|
||||
static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
|
||||
static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct iphdr *iph;
|
||||
u16 size;
|
||||
|
||||
if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
|
||||
if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
|
||||
return false;
|
||||
|
||||
iph = (struct iphdr *)(skb_network_header(skb) + *psize);
|
||||
iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
|
||||
size = iph->ihl << 2;
|
||||
|
||||
if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
|
||||
@@ -312,25 +334,62 @@ static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
|
||||
if (iph->ttl <= 1)
|
||||
return false;
|
||||
|
||||
if (iph->protocol == IPPROTO_IPIP)
|
||||
*psize += size;
|
||||
if (iph->protocol == IPPROTO_IPIP) {
|
||||
ctx->tun.proto = IPPROTO_IPIP;
|
||||
ctx->tun.hdr_size = size;
|
||||
ctx->offset += size;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
|
||||
static bool nf_flow_ip6_tunnel_proto(struct nf_flowtable_ctx *ctx,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
struct ipv6hdr *ip6h, _ip6h;
|
||||
__be16 frag_off;
|
||||
u8 nexthdr;
|
||||
int hdrlen;
|
||||
|
||||
if (iph->protocol != IPPROTO_IPIP)
|
||||
ip6h = skb_header_pointer(skb, ctx->offset, sizeof(*ip6h), &_ip6h);
|
||||
if (!ip6h)
|
||||
return false;
|
||||
|
||||
if (ip6h->hop_limit <= 1)
|
||||
return false;
|
||||
|
||||
nexthdr = ip6h->nexthdr;
|
||||
hdrlen = ipv6_skip_exthdr(skb, sizeof(*ip6h) + ctx->offset, &nexthdr,
|
||||
&frag_off);
|
||||
if (hdrlen < 0)
|
||||
return false;
|
||||
|
||||
if (nexthdr == IPPROTO_IPV6) {
|
||||
ctx->tun.hdr_size = hdrlen;
|
||||
ctx->tun.proto = IPPROTO_IPV6;
|
||||
}
|
||||
ctx->offset += ctx->tun.hdr_size;
|
||||
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif /* IS_ENABLED(CONFIG_IPV6) */
|
||||
}
|
||||
|
||||
static void nf_flow_ip_tunnel_pop(struct nf_flowtable_ctx *ctx,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
if (ctx->tun.proto != IPPROTO_IPIP &&
|
||||
ctx->tun.proto != IPPROTO_IPV6)
|
||||
return;
|
||||
|
||||
skb_pull(skb, iph->ihl << 2);
|
||||
skb_pull(skb, ctx->tun.hdr_size);
|
||||
skb_reset_network_header(skb);
|
||||
}
|
||||
|
||||
static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
|
||||
u32 *offset)
|
||||
static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
|
||||
struct sk_buff *skb, __be16 proto)
|
||||
{
|
||||
__be16 inner_proto = skb->protocol;
|
||||
struct vlan_ethhdr *veth;
|
||||
@@ -343,7 +402,7 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
|
||||
|
||||
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
if (veth->h_vlan_encapsulated_proto == proto) {
|
||||
*offset += VLAN_HLEN;
|
||||
ctx->offset += VLAN_HLEN;
|
||||
inner_proto = proto;
|
||||
ret = true;
|
||||
}
|
||||
@@ -351,19 +410,28 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
|
||||
case htons(ETH_P_PPP_SES):
|
||||
if (nf_flow_pppoe_proto(skb, &inner_proto) &&
|
||||
inner_proto == proto) {
|
||||
*offset += PPPOE_SES_HLEN;
|
||||
ctx->offset += PPPOE_SES_HLEN;
|
||||
ret = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (inner_proto == htons(ETH_P_IP))
|
||||
ret = nf_flow_ip4_tunnel_proto(skb, offset);
|
||||
switch (inner_proto) {
|
||||
case htons(ETH_P_IP):
|
||||
ret = nf_flow_ip4_tunnel_proto(ctx, skb);
|
||||
break;
|
||||
case htons(ETH_P_IPV6):
|
||||
ret = nf_flow_ip6_tunnel_proto(ctx, skb);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nf_flow_encap_pop(struct sk_buff *skb,
|
||||
static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
|
||||
struct sk_buff *skb,
|
||||
struct flow_offload_tuple_rhash *tuplehash)
|
||||
{
|
||||
struct vlan_hdr *vlan_hdr;
|
||||
@@ -389,8 +457,9 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
|
||||
}
|
||||
}
|
||||
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
nf_flow_ip4_tunnel_pop(skb);
|
||||
if (skb->protocol == htons(ETH_P_IP) ||
|
||||
skb->protocol == htons(ETH_P_IPV6))
|
||||
nf_flow_ip_tunnel_pop(ctx, skb);
|
||||
}
|
||||
|
||||
struct nf_flow_xmit {
|
||||
@@ -416,7 +485,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
|
||||
{
|
||||
struct flow_offload_tuple tuple = {};
|
||||
|
||||
if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
|
||||
if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP)))
|
||||
return NULL;
|
||||
|
||||
if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
|
||||
@@ -460,7 +529,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
|
||||
|
||||
flow_offload_refresh(flow_table, flow, false);
|
||||
|
||||
nf_flow_encap_pop(skb, tuplehash);
|
||||
nf_flow_encap_pop(ctx, skb, tuplehash);
|
||||
thoff -= ctx->offset;
|
||||
|
||||
iph = ip_hdr(skb);
|
||||
@@ -569,6 +638,97 @@ static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ipv6_tel_txoption {
|
||||
struct ipv6_txoptions ops;
|
||||
__u8 dst_opt[8];
|
||||
};
|
||||
|
||||
static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple,
|
||||
struct in6_addr **ip6_daddr,
|
||||
int encap_limit)
|
||||
{
|
||||
struct ipv6hdr *ip6h = (struct ipv6hdr *)skb_network_header(skb);
|
||||
u8 hop_limit = ip6h->hop_limit, proto = IPPROTO_IPV6;
|
||||
struct rtable *rt = dst_rtable(tuple->dst_cache);
|
||||
__u8 dsfield = ipv6_get_dsfield(ip6h);
|
||||
struct flowi6 fl6 = {
|
||||
.daddr = tuple->tun.src_v6,
|
||||
.saddr = tuple->tun.dst_v6,
|
||||
.flowi6_proto = proto,
|
||||
};
|
||||
int err, mtu;
|
||||
u32 headroom;
|
||||
|
||||
err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
skb_set_inner_ipproto(skb, proto);
|
||||
headroom = sizeof(*ip6h) + LL_RESERVED_SPACE(rt->dst.dev) +
|
||||
rt->dst.header_len;
|
||||
if (encap_limit)
|
||||
headroom += 8;
|
||||
err = skb_cow_head(skb, headroom);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
skb_scrub_packet(skb, true);
|
||||
mtu = dst_mtu(&rt->dst) - sizeof(*ip6h);
|
||||
if (encap_limit)
|
||||
mtu -= 8;
|
||||
mtu = max(mtu, IPV6_MIN_MTU);
|
||||
skb_dst_update_pmtu_no_confirm(skb, mtu);
|
||||
|
||||
if (encap_limit > 0) {
|
||||
struct ipv6_tel_txoption opt = {
|
||||
.dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT,
|
||||
.dst_opt[3] = 1,
|
||||
.dst_opt[4] = encap_limit,
|
||||
.dst_opt[5] = IPV6_TLV_PADN,
|
||||
.dst_opt[6] = 1,
|
||||
};
|
||||
struct ipv6_opt_hdr *hopt;
|
||||
|
||||
opt.ops.dst1opt = (struct ipv6_opt_hdr *)opt.dst_opt;
|
||||
opt.ops.opt_nflen = 8;
|
||||
|
||||
hopt = skb_push(skb, ipv6_optlen(opt.ops.dst1opt));
|
||||
memcpy(hopt, opt.ops.dst1opt, ipv6_optlen(opt.ops.dst1opt));
|
||||
hopt->nexthdr = IPPROTO_IPV6;
|
||||
proto = NEXTHDR_DEST;
|
||||
}
|
||||
|
||||
skb_push(skb, sizeof(*ip6h));
|
||||
skb_reset_network_header(skb);
|
||||
|
||||
ip6h = ipv6_hdr(skb);
|
||||
ip6_flow_hdr(ip6h, dsfield,
|
||||
ip6_make_flowlabel(net, skb, fl6.flowlabel, true, &fl6));
|
||||
ip6h->hop_limit = hop_limit;
|
||||
ip6h->nexthdr = proto;
|
||||
ip6h->daddr = tuple->tun.src_v6;
|
||||
ip6h->saddr = tuple->tun.dst_v6;
|
||||
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(*ip6h));
|
||||
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
|
||||
|
||||
*ip6_daddr = &tuple->tun.src_v6;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple,
|
||||
struct in6_addr **ip6_daddr,
|
||||
int encap_limit)
|
||||
{
|
||||
if (tuple->tun_num)
|
||||
return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr,
|
||||
encap_limit);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nf_flow_encap_push(struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple)
|
||||
{
|
||||
@@ -838,7 +998,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
|
||||
tuple->l3proto = AF_INET6;
|
||||
tuple->l4proto = nexthdr;
|
||||
tuple->iifidx = ctx->in->ifindex;
|
||||
nf_flow_tuple_encap(skb, tuple);
|
||||
nf_flow_tuple_encap(ctx, skb, tuple);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -846,7 +1006,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
|
||||
static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
|
||||
struct nf_flowtable *flow_table,
|
||||
struct flow_offload_tuple_rhash *tuplehash,
|
||||
struct sk_buff *skb)
|
||||
struct sk_buff *skb, int encap_limit)
|
||||
{
|
||||
enum flow_offload_tuple_dir dir;
|
||||
struct flow_offload *flow;
|
||||
@@ -857,6 +1017,12 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
|
||||
mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
|
||||
if (flow->tuplehash[!dir].tuple.tun_num) {
|
||||
mtu -= sizeof(*ip6h);
|
||||
if (encap_limit > 0)
|
||||
mtu -= 8; /* encap limit option */
|
||||
}
|
||||
|
||||
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
|
||||
return 0;
|
||||
|
||||
@@ -875,7 +1041,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
|
||||
|
||||
flow_offload_refresh(flow_table, flow, false);
|
||||
|
||||
nf_flow_encap_pop(skb, tuplehash);
|
||||
nf_flow_encap_pop(ctx, skb, tuplehash);
|
||||
|
||||
ip6h = ipv6_hdr(skb);
|
||||
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
|
||||
@@ -896,8 +1062,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
|
||||
{
|
||||
struct flow_offload_tuple tuple = {};
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IPV6) &&
|
||||
!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
|
||||
if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
|
||||
return NULL;
|
||||
|
||||
if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
|
||||
@@ -910,6 +1075,7 @@ unsigned int
|
||||
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||
const struct nf_hook_state *state)
|
||||
{
|
||||
int encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
|
||||
struct flow_offload_tuple_rhash *tuplehash;
|
||||
struct nf_flowtable *flow_table = priv;
|
||||
struct flow_offload_tuple *other_tuple;
|
||||
@@ -928,7 +1094,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||
if (tuplehash == NULL)
|
||||
return NF_ACCEPT;
|
||||
|
||||
ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
|
||||
ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb,
|
||||
encap_limit);
|
||||
if (ret < 0)
|
||||
return NF_DROP;
|
||||
else if (ret == 0)
|
||||
@@ -947,6 +1114,10 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
ip6_daddr = &other_tuple->src_v6;
|
||||
|
||||
if (nf_flow_tunnel_v6_push(state->net, skb, other_tuple,
|
||||
&ip6_daddr, encap_limit) < 0)
|
||||
return NF_DROP;
|
||||
|
||||
if (nf_flow_encap_push(skb, other_tuple) < 0)
|
||||
return NF_DROP;
|
||||
|
||||
|
||||
@@ -30,6 +30,8 @@
|
||||
#include <linux/netfilter/nf_conntrack_common.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/cgroup-defs.h>
|
||||
#include <linux/rhashtable.h>
|
||||
#include <linux/jhash.h>
|
||||
#include <net/gso.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/tcp_states.h>
|
||||
@@ -47,6 +49,8 @@
|
||||
#endif
|
||||
|
||||
#define NFQNL_QMAX_DEFAULT 1024
|
||||
#define NFQNL_HASH_MIN 1024
|
||||
#define NFQNL_HASH_MAX 1048576
|
||||
|
||||
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
|
||||
* includes the header length. Thus, the maximum packet length that we
|
||||
@@ -56,6 +60,26 @@
|
||||
*/
|
||||
#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
|
||||
|
||||
/* Composite key for packet lookup: (net, queue_num, packet_id) */
|
||||
struct nfqnl_packet_key {
|
||||
possible_net_t net;
|
||||
u32 packet_id;
|
||||
u16 queue_num;
|
||||
} __aligned(sizeof(u32)); /* jhash2 requires 32-bit alignment */
|
||||
|
||||
/* Global rhashtable - one for entire system, all netns */
|
||||
static struct rhashtable nfqnl_packet_map __read_mostly;
|
||||
|
||||
/* Helper to initialize composite key */
|
||||
static inline void nfqnl_init_key(struct nfqnl_packet_key *key,
|
||||
struct net *net, u32 packet_id, u16 queue_num)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
write_pnet(&key->net, net);
|
||||
key->packet_id = packet_id;
|
||||
key->queue_num = queue_num;
|
||||
}
|
||||
|
||||
struct nfqnl_instance {
|
||||
struct hlist_node hlist; /* global list of queues */
|
||||
struct rcu_head rcu;
|
||||
@@ -100,6 +124,39 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
|
||||
return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
|
||||
}
|
||||
|
||||
/* Extract composite key from nf_queue_entry for hashing */
|
||||
static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed)
|
||||
{
|
||||
const struct nf_queue_entry *entry = data;
|
||||
struct nfqnl_packet_key key;
|
||||
|
||||
nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num);
|
||||
|
||||
return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed);
|
||||
}
|
||||
|
||||
/* Compare stack-allocated key against entry */
|
||||
static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg,
|
||||
const void *obj)
|
||||
{
|
||||
const struct nfqnl_packet_key *key = arg->key;
|
||||
const struct nf_queue_entry *entry = obj;
|
||||
|
||||
return !net_eq(entry->state.net, read_pnet(&key->net)) ||
|
||||
entry->queue_num != key->queue_num ||
|
||||
entry->id != key->packet_id;
|
||||
}
|
||||
|
||||
static const struct rhashtable_params nfqnl_rhashtable_params = {
|
||||
.head_offset = offsetof(struct nf_queue_entry, hash_node),
|
||||
.key_len = sizeof(struct nfqnl_packet_key),
|
||||
.obj_hashfn = nfqnl_packet_obj_hashfn,
|
||||
.obj_cmpfn = nfqnl_packet_obj_cmpfn,
|
||||
.automatic_shrinking = true,
|
||||
.min_size = NFQNL_HASH_MIN,
|
||||
.max_size = NFQNL_HASH_MAX,
|
||||
};
|
||||
|
||||
static struct nfqnl_instance *
|
||||
instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
|
||||
{
|
||||
@@ -188,33 +245,45 @@ instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
|
||||
spin_unlock(&q->instances_lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
static int
|
||||
__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
|
||||
{
|
||||
list_add_tail(&entry->list, &queue->queue_list);
|
||||
queue->queue_total++;
|
||||
int err;
|
||||
|
||||
entry->queue_num = queue->queue_num;
|
||||
|
||||
err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node,
|
||||
nfqnl_rhashtable_params);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
list_add_tail(&entry->list, &queue->queue_list);
|
||||
queue->queue_total++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
|
||||
{
|
||||
rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node,
|
||||
nfqnl_rhashtable_params);
|
||||
list_del(&entry->list);
|
||||
queue->queue_total--;
|
||||
}
|
||||
|
||||
static struct nf_queue_entry *
|
||||
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
|
||||
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id,
|
||||
struct net *net)
|
||||
{
|
||||
struct nf_queue_entry *entry = NULL, *i;
|
||||
struct nfqnl_packet_key key;
|
||||
struct nf_queue_entry *entry;
|
||||
|
||||
nfqnl_init_key(&key, net, id, queue->queue_num);
|
||||
|
||||
spin_lock_bh(&queue->lock);
|
||||
|
||||
list_for_each_entry(i, &queue->queue_list, list) {
|
||||
if (i->id == id) {
|
||||
entry = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key,
|
||||
nfqnl_rhashtable_params);
|
||||
|
||||
if (entry)
|
||||
__dequeue_entry(queue, entry);
|
||||
@@ -404,8 +473,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
|
||||
spin_lock_bh(&queue->lock);
|
||||
list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
|
||||
if (!cmpfn || cmpfn(entry, data)) {
|
||||
list_del(&entry->list);
|
||||
queue->queue_total--;
|
||||
__dequeue_entry(queue, entry);
|
||||
nfqnl_reinject(entry, NF_DROP);
|
||||
}
|
||||
}
|
||||
@@ -885,23 +953,23 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
|
||||
if (nf_ct_drop_unconfirmed(entry))
|
||||
goto err_out_free_nskb;
|
||||
|
||||
if (queue->queue_total >= queue->queue_maxlen) {
|
||||
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
|
||||
failopen = 1;
|
||||
err = 0;
|
||||
} else {
|
||||
queue->queue_dropped++;
|
||||
net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
|
||||
queue->queue_total);
|
||||
}
|
||||
goto err_out_free_nskb;
|
||||
}
|
||||
if (queue->queue_total >= queue->queue_maxlen)
|
||||
goto err_out_queue_drop;
|
||||
|
||||
entry->id = ++queue->id_sequence;
|
||||
*packet_id_ptr = htonl(entry->id);
|
||||
|
||||
/* Insert into hash BEFORE unicast. If failure don't send to userspace. */
|
||||
err = __enqueue_entry(queue, entry);
|
||||
if (unlikely(err))
|
||||
goto err_out_queue_drop;
|
||||
|
||||
/* nfnetlink_unicast will either free the nskb or add it to a socket */
|
||||
err = nfnetlink_unicast(nskb, net, queue->peer_portid);
|
||||
if (err < 0) {
|
||||
/* Unicast failed - remove entry we just inserted */
|
||||
__dequeue_entry(queue, entry);
|
||||
|
||||
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
|
||||
failopen = 1;
|
||||
err = 0;
|
||||
@@ -911,11 +979,22 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
|
||||
goto err_out_unlock;
|
||||
}
|
||||
|
||||
__enqueue_entry(queue, entry);
|
||||
|
||||
spin_unlock_bh(&queue->lock);
|
||||
return 0;
|
||||
|
||||
err_out_queue_drop:
|
||||
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
|
||||
failopen = 1;
|
||||
err = 0;
|
||||
} else {
|
||||
queue->queue_dropped++;
|
||||
|
||||
if (queue->queue_total >= queue->queue_maxlen)
|
||||
net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
|
||||
queue->queue_total);
|
||||
else
|
||||
net_warn_ratelimited("nf_queue: hash insert failed: %d\n", err);
|
||||
}
|
||||
err_out_free_nskb:
|
||||
kfree_skb(nskb);
|
||||
err_out_unlock:
|
||||
@@ -1427,7 +1506,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
|
||||
|
||||
verdict = ntohl(vhdr->verdict);
|
||||
|
||||
entry = find_dequeue_entry(queue, ntohl(vhdr->id));
|
||||
entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net);
|
||||
if (entry == NULL)
|
||||
return -ENOENT;
|
||||
|
||||
@@ -1774,10 +1853,14 @@ static int __init nfnetlink_queue_init(void)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params);
|
||||
if (status < 0)
|
||||
return status;
|
||||
|
||||
status = register_pernet_subsys(&nfnl_queue_net_ops);
|
||||
if (status < 0) {
|
||||
pr_err("failed to register pernet ops\n");
|
||||
goto out;
|
||||
goto cleanup_rhashtable;
|
||||
}
|
||||
|
||||
netlink_register_notifier(&nfqnl_rtnl_notifier);
|
||||
@@ -1802,7 +1885,8 @@ static int __init nfnetlink_queue_init(void)
|
||||
cleanup_netlink_notifier:
|
||||
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
|
||||
unregister_pernet_subsys(&nfnl_queue_net_ops);
|
||||
out:
|
||||
cleanup_rhashtable:
|
||||
rhashtable_destroy(&nfqnl_packet_map);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -1814,6 +1898,8 @@ static void __exit nfnetlink_queue_fini(void)
|
||||
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
|
||||
unregister_pernet_subsys(&nfnl_queue_net_ops);
|
||||
|
||||
rhashtable_destroy(&nfqnl_packet_map);
|
||||
|
||||
rcu_barrier(); /* Wait for completion of call_rcu()'s */
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/rtc.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/netfilter/x_tables.h>
|
||||
@@ -64,11 +65,6 @@ static const u_int16_t days_since_epoch[] = {
|
||||
3287, 2922, 2557, 2191, 1826, 1461, 1096, 730, 365, 0,
|
||||
};
|
||||
|
||||
static inline bool is_leap(unsigned int y)
|
||||
{
|
||||
return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Each network packet has a (nano)seconds-since-the-epoch (SSTE) timestamp.
|
||||
* Since we match against days and daytime, the SSTE value needs to be
|
||||
@@ -138,7 +134,7 @@ static void localtime_3(struct xtm *r, time64_t time)
|
||||
* (A different approach to use would be to subtract a monthlength
|
||||
* from w repeatedly while counting.)
|
||||
*/
|
||||
if (is_leap(year)) {
|
||||
if (is_leap_year(year)) {
|
||||
/* use days_since_leapyear[] in a leap year */
|
||||
for (i = ARRAY_SIZE(days_since_leapyear) - 1;
|
||||
i > 0 && days_since_leapyear[i] > w; --i)
|
||||
|
||||
@@ -592,16 +592,28 @@ ip -net "$nsr1" link set tun0 up
|
||||
ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
|
||||
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
|
||||
|
||||
ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
|
||||
ip -net "$nsr1" link set tun6 up
|
||||
ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
|
||||
|
||||
ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
|
||||
ip -net "$nsr2" link set tun0 up
|
||||
ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
|
||||
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
|
||||
|
||||
ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1
|
||||
ip -net "$nsr2" link set tun6 up
|
||||
ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
|
||||
|
||||
ip -net "$nsr1" route change default via 192.168.100.2
|
||||
ip -net "$nsr2" route change default via 192.168.100.1
|
||||
ip -6 -net "$nsr1" route change default via fee1:3::2
|
||||
ip -6 -net "$nsr2" route change default via fee1:3::1
|
||||
ip -net "$ns2" route add default via 10.0.2.1
|
||||
ip -6 -net "$ns2" route add default via dead:2::1
|
||||
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward \
|
||||
'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
|
||||
|
||||
@@ -611,28 +623,51 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
|
||||
ret=1
|
||||
fi
|
||||
|
||||
if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
|
||||
echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
|
||||
else
|
||||
echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
|
||||
ip netns exec "$nsr1" nft list ruleset
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# Create vlan tagged devices for IPIP traffic.
|
||||
ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
|
||||
ip -net "$nsr1" link set veth1.10 up
|
||||
ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
|
||||
ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
|
||||
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
|
||||
ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
|
||||
ip -net "$nsr1" link set tun1 up
|
||||
ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
|
||||
|
||||
ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
|
||||
ip -net "$nsr1" link set tun0.10 up
|
||||
ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
|
||||
ip -net "$nsr1" route change default via 192.168.200.2
|
||||
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
|
||||
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
|
||||
|
||||
ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
|
||||
ip -net "$nsr1" link set tun6.10 up
|
||||
ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
|
||||
ip -6 -net "$nsr1" route change default via fee1:5::2
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
|
||||
|
||||
ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
|
||||
ip -net "$nsr2" link set veth0.10 up
|
||||
ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
|
||||
ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
|
||||
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
|
||||
ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
|
||||
ip -net "$nsr2" link set tun1 up
|
||||
ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
|
||||
|
||||
ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
|
||||
ip -net "$nsr2" link set tun0.10 up
|
||||
ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
|
||||
ip -net "$nsr2" route change default via 192.168.200.1
|
||||
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
|
||||
|
||||
ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1
|
||||
ip -net "$nsr2" link set tun6.10 up
|
||||
ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
|
||||
ip -6 -net "$nsr2" route change default via fee1:5::1
|
||||
|
||||
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
|
||||
echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
|
||||
@@ -640,10 +675,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
|
||||
ret=1
|
||||
fi
|
||||
|
||||
if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
|
||||
echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
|
||||
else
|
||||
echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
|
||||
ip netns exec "$nsr1" nft list ruleset
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# Restore the previous configuration
|
||||
ip -net "$nsr1" route change default via 192.168.10.2
|
||||
ip -net "$nsr2" route change default via 192.168.10.1
|
||||
ip -net "$ns2" route del default via 10.0.2.1
|
||||
ip -6 -net "$ns2" route del default via dead:2::1
|
||||
}
|
||||
|
||||
# Another test:
|
||||
|
||||
Reference in New Issue
Block a user