Florian Westphal says:

====================
netfilter: updates for net-next

The following patchset contains Netfilter updates for *net-next*:

Patches 1 to 4 add IP6IP6 tunneling acceleration to the flowtable
infrastructure.  Patch 5 extends test coverage for this.
From Lorenzo Bianconi.

Patch 6 removes a duplicated helper from xt_time extension, we can
use an existing helper for this, from Jinjie Ruan.

Patch 7 adds an rhashtable to nfnetink_queue to speed up out-of-order
verdict processing.  Before this list walk was required due to in-order
design assumption.

netfilter pull request nf-next-26-01-29

* tag 'nf-next-26-01-29' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: nfnetlink_queue: optimize verdict lookup with hash table
  netfilter: xt_time: use is_leap_year() helper
  selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest
  netfilter: flowtable: Add IP6IP6 tx sw acceleration
  netfilter: flowtable: Add IP6IP6 rx sw acceleration
  netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct
  netfilter: Add ctx pointer in nf_flow_skb_encap_protocol/nf_flow_ip4_tunnel_proto signature
====================

Link: https://patch.msgid.link/20260129105427.12494-1-fw@strlen.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni
2026-01-29 14:56:13 +01:00
6 changed files with 408 additions and 81 deletions

View File

@@ -6,11 +6,13 @@
#include <linux/ipv6.h>
#include <linux/jhash.h>
#include <linux/netfilter.h>
#include <linux/rhashtable-types.h>
#include <linux/skbuff.h>
/* Each queued (to userspace) skbuff has one of these. */
struct nf_queue_entry {
struct list_head list;
struct rhash_head hash_node;
struct sk_buff *skb;
unsigned int id;
unsigned int hook_index; /* index in hook_entries->hook[] */
@@ -20,6 +22,7 @@ struct nf_queue_entry {
#endif
struct nf_hook_state state;
u16 size; /* sizeof(entry) + saved route keys */
u16 queue_num;
/* extra space to store route keys */
};

View File

@@ -1828,6 +1828,32 @@ int ip6_tnl_encap_setup(struct ip6_tnl *t,
}
EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup);
static int ip6_tnl_fill_forward_path(struct net_device_path_ctx *ctx,
struct net_device_path *path)
{
struct ip6_tnl *t = netdev_priv(ctx->dev);
struct flowi6 fl6 = {
.daddr = t->parms.raddr,
};
struct dst_entry *dst;
int err;
dst = ip6_route_output(dev_net(ctx->dev), NULL, &fl6);
if (!dst->error) {
path->type = DEV_PATH_TUN;
path->tun.src_v6 = t->parms.laddr;
path->tun.dst_v6 = t->parms.raddr;
path->tun.l3_proto = IPPROTO_IPV6;
path->dev = ctx->dev;
ctx->dev = dst->dev;
}
err = dst->error;
dst_release(dst);
return err;
}
static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_init = ip6_tnl_dev_init,
.ndo_uninit = ip6_tnl_dev_uninit,
@@ -1836,6 +1862,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
.ndo_fill_forward_path = ip6_tnl_fill_forward_path,
};
#define IPXIPX_FEATURES (NETIF_F_SG | \

View File

@@ -14,6 +14,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip6_tunnel.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack_acct.h>
@@ -144,12 +145,26 @@ static bool ip_has_options(unsigned int thoff)
return thoff != sizeof(struct iphdr);
}
static void nf_flow_tuple_encap(struct sk_buff *skb,
struct nf_flowtable_ctx {
const struct net_device *in;
u32 offset;
u32 hdrsize;
struct {
/* Tunnel IP header size */
u32 hdr_size;
/* IP tunnel protocol */
u8 proto;
} tun;
};
static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
__be16 inner_proto = skb->protocol;
struct vlan_ethhdr *veth;
struct pppoe_hdr *phdr;
struct ipv6hdr *ip6h;
struct iphdr *iph;
u16 offset = 0;
int i = 0;
@@ -176,22 +191,28 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
break;
}
if (inner_proto == htons(ETH_P_IP)) {
switch (inner_proto) {
case htons(ETH_P_IP):
iph = (struct iphdr *)(skb_network_header(skb) + offset);
if (iph->protocol == IPPROTO_IPIP) {
if (ctx->tun.proto == IPPROTO_IPIP) {
tuple->tun.dst_v4.s_addr = iph->daddr;
tuple->tun.src_v4.s_addr = iph->saddr;
tuple->tun.l3_proto = IPPROTO_IPIP;
}
break;
case htons(ETH_P_IPV6):
ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
if (ctx->tun.proto == IPPROTO_IPV6) {
tuple->tun.dst_v6 = ip6h->daddr;
tuple->tun.src_v6 = ip6h->saddr;
tuple->tun.l3_proto = IPPROTO_IPV6;
}
break;
default:
break;
}
}
struct nf_flowtable_ctx {
const struct net_device *in;
u32 offset;
u32 hdrsize;
};
static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
@@ -259,7 +280,7 @@ static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
tuple->l3proto = AF_INET;
tuple->l4proto = ipproto;
tuple->iifidx = ctx->in->ifindex;
nf_flow_tuple_encap(skb, tuple);
nf_flow_tuple_encap(ctx, skb, tuple);
return 0;
}
@@ -295,15 +316,16 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
struct sk_buff *skb)
{
struct iphdr *iph;
u16 size;
if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
return false;
iph = (struct iphdr *)(skb_network_header(skb) + *psize);
iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
size = iph->ihl << 2;
if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
@@ -312,25 +334,62 @@ static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
if (iph->ttl <= 1)
return false;
if (iph->protocol == IPPROTO_IPIP)
*psize += size;
if (iph->protocol == IPPROTO_IPIP) {
ctx->tun.proto = IPPROTO_IPIP;
ctx->tun.hdr_size = size;
ctx->offset += size;
}
return true;
}
static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
static bool nf_flow_ip6_tunnel_proto(struct nf_flowtable_ctx *ctx,
struct sk_buff *skb)
{
struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
#if IS_ENABLED(CONFIG_IPV6)
struct ipv6hdr *ip6h, _ip6h;
__be16 frag_off;
u8 nexthdr;
int hdrlen;
if (iph->protocol != IPPROTO_IPIP)
ip6h = skb_header_pointer(skb, ctx->offset, sizeof(*ip6h), &_ip6h);
if (!ip6h)
return false;
if (ip6h->hop_limit <= 1)
return false;
nexthdr = ip6h->nexthdr;
hdrlen = ipv6_skip_exthdr(skb, sizeof(*ip6h) + ctx->offset, &nexthdr,
&frag_off);
if (hdrlen < 0)
return false;
if (nexthdr == IPPROTO_IPV6) {
ctx->tun.hdr_size = hdrlen;
ctx->tun.proto = IPPROTO_IPV6;
}
ctx->offset += ctx->tun.hdr_size;
return true;
#else
return false;
#endif /* IS_ENABLED(CONFIG_IPV6) */
}
static void nf_flow_ip_tunnel_pop(struct nf_flowtable_ctx *ctx,
struct sk_buff *skb)
{
if (ctx->tun.proto != IPPROTO_IPIP &&
ctx->tun.proto != IPPROTO_IPV6)
return;
skb_pull(skb, iph->ihl << 2);
skb_pull(skb, ctx->tun.hdr_size);
skb_reset_network_header(skb);
}
static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
struct sk_buff *skb, __be16 proto)
{
__be16 inner_proto = skb->protocol;
struct vlan_ethhdr *veth;
@@ -343,7 +402,7 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
if (veth->h_vlan_encapsulated_proto == proto) {
*offset += VLAN_HLEN;
ctx->offset += VLAN_HLEN;
inner_proto = proto;
ret = true;
}
@@ -351,19 +410,28 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
case htons(ETH_P_PPP_SES):
if (nf_flow_pppoe_proto(skb, &inner_proto) &&
inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
ctx->offset += PPPOE_SES_HLEN;
ret = true;
}
break;
}
if (inner_proto == htons(ETH_P_IP))
ret = nf_flow_ip4_tunnel_proto(skb, offset);
switch (inner_proto) {
case htons(ETH_P_IP):
ret = nf_flow_ip4_tunnel_proto(ctx, skb);
break;
case htons(ETH_P_IPV6):
ret = nf_flow_ip6_tunnel_proto(ctx, skb);
break;
default:
break;
}
return ret;
}
static void nf_flow_encap_pop(struct sk_buff *skb,
static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
struct sk_buff *skb,
struct flow_offload_tuple_rhash *tuplehash)
{
struct vlan_hdr *vlan_hdr;
@@ -389,8 +457,9 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
}
}
if (skb->protocol == htons(ETH_P_IP))
nf_flow_ip4_tunnel_pop(skb);
if (skb->protocol == htons(ETH_P_IP) ||
skb->protocol == htons(ETH_P_IPV6))
nf_flow_ip_tunnel_pop(ctx, skb);
}
struct nf_flow_xmit {
@@ -416,7 +485,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
{
struct flow_offload_tuple tuple = {};
if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP)))
return NULL;
if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
@@ -460,7 +529,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
flow_offload_refresh(flow_table, flow, false);
nf_flow_encap_pop(skb, tuplehash);
nf_flow_encap_pop(ctx, skb, tuplehash);
thoff -= ctx->offset;
iph = ip_hdr(skb);
@@ -569,6 +638,97 @@ static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
return 0;
}
struct ipv6_tel_txoption {
struct ipv6_txoptions ops;
__u8 dst_opt[8];
};
static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
struct flow_offload_tuple *tuple,
struct in6_addr **ip6_daddr,
int encap_limit)
{
struct ipv6hdr *ip6h = (struct ipv6hdr *)skb_network_header(skb);
u8 hop_limit = ip6h->hop_limit, proto = IPPROTO_IPV6;
struct rtable *rt = dst_rtable(tuple->dst_cache);
__u8 dsfield = ipv6_get_dsfield(ip6h);
struct flowi6 fl6 = {
.daddr = tuple->tun.src_v6,
.saddr = tuple->tun.dst_v6,
.flowi6_proto = proto,
};
int err, mtu;
u32 headroom;
err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
if (err)
return err;
skb_set_inner_ipproto(skb, proto);
headroom = sizeof(*ip6h) + LL_RESERVED_SPACE(rt->dst.dev) +
rt->dst.header_len;
if (encap_limit)
headroom += 8;
err = skb_cow_head(skb, headroom);
if (err)
return err;
skb_scrub_packet(skb, true);
mtu = dst_mtu(&rt->dst) - sizeof(*ip6h);
if (encap_limit)
mtu -= 8;
mtu = max(mtu, IPV6_MIN_MTU);
skb_dst_update_pmtu_no_confirm(skb, mtu);
if (encap_limit > 0) {
struct ipv6_tel_txoption opt = {
.dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT,
.dst_opt[3] = 1,
.dst_opt[4] = encap_limit,
.dst_opt[5] = IPV6_TLV_PADN,
.dst_opt[6] = 1,
};
struct ipv6_opt_hdr *hopt;
opt.ops.dst1opt = (struct ipv6_opt_hdr *)opt.dst_opt;
opt.ops.opt_nflen = 8;
hopt = skb_push(skb, ipv6_optlen(opt.ops.dst1opt));
memcpy(hopt, opt.ops.dst1opt, ipv6_optlen(opt.ops.dst1opt));
hopt->nexthdr = IPPROTO_IPV6;
proto = NEXTHDR_DEST;
}
skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
ip6h = ipv6_hdr(skb);
ip6_flow_hdr(ip6h, dsfield,
ip6_make_flowlabel(net, skb, fl6.flowlabel, true, &fl6));
ip6h->hop_limit = hop_limit;
ip6h->nexthdr = proto;
ip6h->daddr = tuple->tun.src_v6;
ip6h->saddr = tuple->tun.dst_v6;
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(*ip6h));
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
*ip6_daddr = &tuple->tun.src_v6;
return 0;
}
static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
struct flow_offload_tuple *tuple,
struct in6_addr **ip6_daddr,
int encap_limit)
{
if (tuple->tun_num)
return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr,
encap_limit);
return 0;
}
static int nf_flow_encap_push(struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
@@ -838,7 +998,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
tuple->l3proto = AF_INET6;
tuple->l4proto = nexthdr;
tuple->iifidx = ctx->in->ifindex;
nf_flow_tuple_encap(skb, tuple);
nf_flow_tuple_encap(ctx, skb, tuple);
return 0;
}
@@ -846,7 +1006,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
struct nf_flowtable *flow_table,
struct flow_offload_tuple_rhash *tuplehash,
struct sk_buff *skb)
struct sk_buff *skb, int encap_limit)
{
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
@@ -857,6 +1017,12 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
if (flow->tuplehash[!dir].tuple.tun_num) {
mtu -= sizeof(*ip6h);
if (encap_limit > 0)
mtu -= 8; /* encap limit option */
}
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
return 0;
@@ -875,7 +1041,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
flow_offload_refresh(flow_table, flow, false);
nf_flow_encap_pop(skb, tuplehash);
nf_flow_encap_pop(ctx, skb, tuplehash);
ip6h = ipv6_hdr(skb);
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
@@ -896,8 +1062,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
{
struct flow_offload_tuple tuple = {};
if (skb->protocol != htons(ETH_P_IPV6) &&
!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
return NULL;
if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
@@ -910,6 +1075,7 @@ unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
int encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple *other_tuple;
@@ -928,7 +1094,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (tuplehash == NULL)
return NF_ACCEPT;
ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb,
encap_limit);
if (ret < 0)
return NF_DROP;
else if (ret == 0)
@@ -947,6 +1114,10 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
other_tuple = &flow->tuplehash[!dir].tuple;
ip6_daddr = &other_tuple->src_v6;
if (nf_flow_tunnel_v6_push(state->net, skb, other_tuple,
&ip6_daddr, encap_limit) < 0)
return NF_DROP;
if (nf_flow_encap_push(skb, other_tuple) < 0)
return NF_DROP;

View File

@@ -30,6 +30,8 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/list.h>
#include <linux/cgroup-defs.h>
#include <linux/rhashtable.h>
#include <linux/jhash.h>
#include <net/gso.h>
#include <net/sock.h>
#include <net/tcp_states.h>
@@ -47,6 +49,8 @@
#endif
#define NFQNL_QMAX_DEFAULT 1024
#define NFQNL_HASH_MIN 1024
#define NFQNL_HASH_MAX 1048576
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
* includes the header length. Thus, the maximum packet length that we
@@ -56,6 +60,26 @@
*/
#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
/* Composite key for packet lookup: (net, queue_num, packet_id) */
struct nfqnl_packet_key {
possible_net_t net;
u32 packet_id;
u16 queue_num;
} __aligned(sizeof(u32)); /* jhash2 requires 32-bit alignment */
/* Global rhashtable - one for entire system, all netns */
static struct rhashtable nfqnl_packet_map __read_mostly;
/* Helper to initialize composite key */
static inline void nfqnl_init_key(struct nfqnl_packet_key *key,
struct net *net, u32 packet_id, u16 queue_num)
{
memset(key, 0, sizeof(*key));
write_pnet(&key->net, net);
key->packet_id = packet_id;
key->queue_num = queue_num;
}
struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
@@ -100,6 +124,39 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
}
/* Extract composite key from nf_queue_entry for hashing */
static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed)
{
const struct nf_queue_entry *entry = data;
struct nfqnl_packet_key key;
nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num);
return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed);
}
/* Compare stack-allocated key against entry */
static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg,
const void *obj)
{
const struct nfqnl_packet_key *key = arg->key;
const struct nf_queue_entry *entry = obj;
return !net_eq(entry->state.net, read_pnet(&key->net)) ||
entry->queue_num != key->queue_num ||
entry->id != key->packet_id;
}
static const struct rhashtable_params nfqnl_rhashtable_params = {
.head_offset = offsetof(struct nf_queue_entry, hash_node),
.key_len = sizeof(struct nfqnl_packet_key),
.obj_hashfn = nfqnl_packet_obj_hashfn,
.obj_cmpfn = nfqnl_packet_obj_cmpfn,
.automatic_shrinking = true,
.min_size = NFQNL_HASH_MIN,
.max_size = NFQNL_HASH_MAX,
};
static struct nfqnl_instance *
instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
{
@@ -188,33 +245,45 @@ instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
spin_unlock(&q->instances_lock);
}
static inline void
static int
__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
list_add_tail(&entry->list, &queue->queue_list);
queue->queue_total++;
int err;
entry->queue_num = queue->queue_num;
err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node,
nfqnl_rhashtable_params);
if (unlikely(err))
return err;
list_add_tail(&entry->list, &queue->queue_list);
queue->queue_total++;
return 0;
}
static void
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node,
nfqnl_rhashtable_params);
list_del(&entry->list);
queue->queue_total--;
}
static struct nf_queue_entry *
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id,
struct net *net)
{
struct nf_queue_entry *entry = NULL, *i;
struct nfqnl_packet_key key;
struct nf_queue_entry *entry;
nfqnl_init_key(&key, net, id, queue->queue_num);
spin_lock_bh(&queue->lock);
list_for_each_entry(i, &queue->queue_list, list) {
if (i->id == id) {
entry = i;
break;
}
}
entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key,
nfqnl_rhashtable_params);
if (entry)
__dequeue_entry(queue, entry);
@@ -404,8 +473,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
spin_lock_bh(&queue->lock);
list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
if (!cmpfn || cmpfn(entry, data)) {
list_del(&entry->list);
queue->queue_total--;
__dequeue_entry(queue, entry);
nfqnl_reinject(entry, NF_DROP);
}
}
@@ -885,23 +953,23 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
if (nf_ct_drop_unconfirmed(entry))
goto err_out_free_nskb;
if (queue->queue_total >= queue->queue_maxlen) {
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
err = 0;
} else {
queue->queue_dropped++;
net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
queue->queue_total);
}
goto err_out_free_nskb;
}
if (queue->queue_total >= queue->queue_maxlen)
goto err_out_queue_drop;
entry->id = ++queue->id_sequence;
*packet_id_ptr = htonl(entry->id);
/* Insert into hash BEFORE unicast. If failure don't send to userspace. */
err = __enqueue_entry(queue, entry);
if (unlikely(err))
goto err_out_queue_drop;
/* nfnetlink_unicast will either free the nskb or add it to a socket */
err = nfnetlink_unicast(nskb, net, queue->peer_portid);
if (err < 0) {
/* Unicast failed - remove entry we just inserted */
__dequeue_entry(queue, entry);
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
err = 0;
@@ -911,11 +979,22 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
goto err_out_unlock;
}
__enqueue_entry(queue, entry);
spin_unlock_bh(&queue->lock);
return 0;
err_out_queue_drop:
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
err = 0;
} else {
queue->queue_dropped++;
if (queue->queue_total >= queue->queue_maxlen)
net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
queue->queue_total);
else
net_warn_ratelimited("nf_queue: hash insert failed: %d\n", err);
}
err_out_free_nskb:
kfree_skb(nskb);
err_out_unlock:
@@ -1427,7 +1506,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
verdict = ntohl(vhdr->verdict);
entry = find_dequeue_entry(queue, ntohl(vhdr->id));
entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net);
if (entry == NULL)
return -ENOENT;
@@ -1774,10 +1853,14 @@ static int __init nfnetlink_queue_init(void)
{
int status;
status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params);
if (status < 0)
return status;
status = register_pernet_subsys(&nfnl_queue_net_ops);
if (status < 0) {
pr_err("failed to register pernet ops\n");
goto out;
goto cleanup_rhashtable;
}
netlink_register_notifier(&nfqnl_rtnl_notifier);
@@ -1802,7 +1885,8 @@ static int __init nfnetlink_queue_init(void)
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_queue_net_ops);
out:
cleanup_rhashtable:
rhashtable_destroy(&nfqnl_packet_map);
return status;
}
@@ -1814,6 +1898,8 @@ static void __exit nfnetlink_queue_fini(void)
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_queue_net_ops);
rhashtable_destroy(&nfqnl_packet_map);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}

View File

@@ -14,6 +14,7 @@
#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/rtc.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/netfilter/x_tables.h>
@@ -64,11 +65,6 @@ static const u_int16_t days_since_epoch[] = {
3287, 2922, 2557, 2191, 1826, 1461, 1096, 730, 365, 0,
};
static inline bool is_leap(unsigned int y)
{
return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
}
/*
* Each network packet has a (nano)seconds-since-the-epoch (SSTE) timestamp.
* Since we match against days and daytime, the SSTE value needs to be
@@ -138,7 +134,7 @@ static void localtime_3(struct xtm *r, time64_t time)
* (A different approach to use would be to subtract a monthlength
* from w repeatedly while counting.)
*/
if (is_leap(year)) {
if (is_leap_year(year)) {
/* use days_since_leapyear[] in a leap year */
for (i = ARRAY_SIZE(days_since_leapyear) - 1;
i > 0 && days_since_leapyear[i] > w; --i)

View File

@@ -592,16 +592,28 @@ ip -net "$nsr1" link set tun0 up
ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
ip -net "$nsr1" link set tun6 up
ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
ip -net "$nsr2" link set tun0 up
ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1
ip -net "$nsr2" link set tun6 up
ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
ip -net "$nsr1" route change default via 192.168.100.2
ip -net "$nsr2" route change default via 192.168.100.1
ip -6 -net "$nsr1" route change default via fee1:3::2
ip -6 -net "$nsr2" route change default via fee1:3::1
ip -net "$ns2" route add default via 10.0.2.1
ip -6 -net "$ns2" route add default via dead:2::1
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
ip netns exec "$nsr1" nft -a insert rule inet filter forward \
'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
@@ -611,28 +623,51 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
ret=1
fi
if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
else
echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
ip netns exec "$nsr1" nft list ruleset
ret=1
fi
# Create vlan tagged devices for IPIP traffic.
ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
ip -net "$nsr1" link set veth1.10 up
ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
ip -net "$nsr1" link set tun1 up
ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
ip -net "$nsr1" link set tun0.10 up
ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
ip -net "$nsr1" route change default via 192.168.200.2
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
ip -net "$nsr1" link set tun6.10 up
ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
ip -6 -net "$nsr1" route change default via fee1:5::2
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
ip -net "$nsr2" link set veth0.10 up
ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
ip -net "$nsr2" link set tun1 up
ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
ip -net "$nsr2" link set tun0.10 up
ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
ip -net "$nsr2" route change default via 192.168.200.1
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1
ip -net "$nsr2" link set tun6.10 up
ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
ip -6 -net "$nsr2" route change default via fee1:5::1
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
@@ -640,10 +675,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
ret=1
fi
if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
else
echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
ip netns exec "$nsr1" nft list ruleset
ret=1
fi
# Restore the previous configuration
ip -net "$nsr1" route change default via 192.168.10.2
ip -net "$nsr2" route change default via 192.168.10.1
ip -net "$ns2" route del default via 10.0.2.1
ip -6 -net "$ns2" route del default via dead:2::1
}
# Another test: