mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-10 17:31:37 -04:00
Merge tag 'nf-next-25-05-06' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for net-next: 1) Apparently, nf_conntrack_bridge changes the way in which fragments are handled, dealing to packet drop. From Huajian Yang. 2) Add a selftest to stress the conntrack subsystem, from Florian Westphal. 3) nft_quota depletion is off-by-one byte, Zhongqiu Duan. 4) Rewrites the procfs to read the conntrack table to speed it up, from Florian Westphal. 5) Two patches to prevent overflow in nft_pipapo lookup table and to clamp the maximum bucket size. 6) Update nft_fib selftest to check for loopback packet bypass. From Florian Westphal. netfilter pull request 25-05-06 * tag 'nf-next-25-05-06' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: selftests: netfilter: nft_fib.sh: check lo packets bypass fib lookup netfilter: nft_set_pipapo: clamp maximum map bucket size to INT_MAX netfilter: nft_set_pipapo: prevent overflow in lookup table allocation netfilter: nf_conntrack: speed up reads from nf_conntrack proc file netfilter: nft_quota: match correctly when the quota just depleted selftests: netfilter: add conntrack stress test netfilter: bridge: Move specific fragmented packet to slow_path instead of dropping it ==================== Link: https://patch.msgid.link/20250505234151.228057-1-pablo@netfilter.org Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
@@ -60,19 +60,19 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
|
||||
struct ip_fraglist_iter iter;
|
||||
struct sk_buff *frag;
|
||||
|
||||
if (first_len - hlen > mtu ||
|
||||
skb_headroom(skb) < ll_rs)
|
||||
if (first_len - hlen > mtu)
|
||||
goto blackhole;
|
||||
|
||||
if (skb_cloned(skb))
|
||||
if (skb_cloned(skb) ||
|
||||
skb_headroom(skb) < ll_rs)
|
||||
goto slow_path;
|
||||
|
||||
skb_walk_frags(skb, frag) {
|
||||
if (frag->len > mtu ||
|
||||
skb_headroom(frag) < hlen + ll_rs)
|
||||
if (frag->len > mtu)
|
||||
goto blackhole;
|
||||
|
||||
if (skb_shared(frag))
|
||||
if (skb_shared(frag) ||
|
||||
skb_headroom(frag) < hlen + ll_rs)
|
||||
goto slow_path;
|
||||
}
|
||||
|
||||
|
||||
@@ -164,20 +164,20 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
|
||||
struct ip6_fraglist_iter iter;
|
||||
struct sk_buff *frag2;
|
||||
|
||||
if (first_len - hlen > mtu ||
|
||||
skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
|
||||
if (first_len - hlen > mtu)
|
||||
goto blackhole;
|
||||
|
||||
if (skb_cloned(skb))
|
||||
if (skb_cloned(skb) ||
|
||||
skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
|
||||
goto slow_path;
|
||||
|
||||
skb_walk_frags(skb, frag2) {
|
||||
if (frag2->len > mtu ||
|
||||
skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
|
||||
if (frag2->len > mtu)
|
||||
goto blackhole;
|
||||
|
||||
/* Partially cloned skb? */
|
||||
if (skb_shared(frag2))
|
||||
if (skb_shared(frag2) ||
|
||||
skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
|
||||
goto slow_path;
|
||||
}
|
||||
|
||||
|
||||
@@ -98,69 +98,87 @@ struct ct_iter_state {
|
||||
struct seq_net_private p;
|
||||
struct hlist_nulls_head *hash;
|
||||
unsigned int htable_size;
|
||||
unsigned int skip_elems;
|
||||
unsigned int bucket;
|
||||
u_int64_t time_now;
|
||||
};
|
||||
|
||||
static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
|
||||
static struct nf_conntrack_tuple_hash *ct_get_next(const struct net *net,
|
||||
struct ct_iter_state *st)
|
||||
{
|
||||
struct ct_iter_state *st = seq->private;
|
||||
struct nf_conntrack_tuple_hash *h;
|
||||
struct hlist_nulls_node *n;
|
||||
unsigned int i;
|
||||
|
||||
for (st->bucket = 0;
|
||||
st->bucket < st->htable_size;
|
||||
st->bucket++) {
|
||||
n = rcu_dereference(
|
||||
hlist_nulls_first_rcu(&st->hash[st->bucket]));
|
||||
if (!is_a_nulls(n))
|
||||
return n;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
for (i = st->bucket; i < st->htable_size; i++) {
|
||||
unsigned int skip = 0;
|
||||
|
||||
static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
|
||||
struct hlist_nulls_node *head)
|
||||
{
|
||||
struct ct_iter_state *st = seq->private;
|
||||
restart:
|
||||
hlist_nulls_for_each_entry_rcu(h, n, &st->hash[i], hnnode) {
|
||||
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
struct hlist_nulls_node *tmp = n;
|
||||
|
||||
head = rcu_dereference(hlist_nulls_next_rcu(head));
|
||||
while (is_a_nulls(head)) {
|
||||
if (likely(get_nulls_value(head) == st->bucket)) {
|
||||
if (++st->bucket >= st->htable_size)
|
||||
return NULL;
|
||||
if (!net_eq(net, nf_ct_net(ct)))
|
||||
continue;
|
||||
|
||||
if (++skip <= st->skip_elems)
|
||||
continue;
|
||||
|
||||
/* h should be returned, skip to nulls marker. */
|
||||
while (!is_a_nulls(tmp))
|
||||
tmp = rcu_dereference(hlist_nulls_next_rcu(tmp));
|
||||
|
||||
/* check if h is still linked to hash[i] */
|
||||
if (get_nulls_value(tmp) != i) {
|
||||
skip = 0;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
st->skip_elems = skip;
|
||||
st->bucket = i;
|
||||
return h;
|
||||
}
|
||||
head = rcu_dereference(
|
||||
hlist_nulls_first_rcu(&st->hash[st->bucket]));
|
||||
|
||||
skip = 0;
|
||||
if (get_nulls_value(n) != i)
|
||||
goto restart;
|
||||
|
||||
st->skip_elems = 0;
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
|
||||
{
|
||||
struct hlist_nulls_node *head = ct_get_first(seq);
|
||||
|
||||
if (head)
|
||||
while (pos && (head = ct_get_next(seq, head)))
|
||||
pos--;
|
||||
return pos ? NULL : head;
|
||||
st->bucket = i;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
__acquires(RCU)
|
||||
{
|
||||
struct ct_iter_state *st = seq->private;
|
||||
struct net *net = seq_file_net(seq);
|
||||
|
||||
st->time_now = ktime_get_real_ns();
|
||||
rcu_read_lock();
|
||||
|
||||
nf_conntrack_get_ht(&st->hash, &st->htable_size);
|
||||
return ct_get_idx(seq, *pos);
|
||||
|
||||
if (*pos == 0) {
|
||||
st->skip_elems = 0;
|
||||
st->bucket = 0;
|
||||
} else if (st->skip_elems) {
|
||||
/* resume from last dumped entry */
|
||||
st->skip_elems--;
|
||||
}
|
||||
|
||||
return ct_get_next(net, st);
|
||||
}
|
||||
|
||||
static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
{
|
||||
struct ct_iter_state *st = s->private;
|
||||
struct net *net = seq_file_net(s);
|
||||
|
||||
(*pos)++;
|
||||
return ct_get_next(s, v);
|
||||
return ct_get_next(net, st);
|
||||
}
|
||||
|
||||
static void ct_seq_stop(struct seq_file *s, void *v)
|
||||
|
||||
@@ -19,10 +19,16 @@ struct nft_quota {
|
||||
};
|
||||
|
||||
static inline bool nft_overquota(struct nft_quota *priv,
|
||||
const struct sk_buff *skb)
|
||||
const struct sk_buff *skb,
|
||||
bool *report)
|
||||
{
|
||||
return atomic64_add_return(skb->len, priv->consumed) >=
|
||||
atomic64_read(&priv->quota);
|
||||
u64 consumed = atomic64_add_return(skb->len, priv->consumed);
|
||||
u64 quota = atomic64_read(&priv->quota);
|
||||
|
||||
if (report)
|
||||
*report = consumed >= quota;
|
||||
|
||||
return consumed > quota;
|
||||
}
|
||||
|
||||
static inline bool nft_quota_invert(struct nft_quota *priv)
|
||||
@@ -34,7 +40,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv,
|
||||
struct nft_regs *regs,
|
||||
const struct nft_pktinfo *pkt)
|
||||
{
|
||||
if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv))
|
||||
if (nft_overquota(priv, pkt->skb, NULL) ^ nft_quota_invert(priv))
|
||||
regs->verdict.code = NFT_BREAK;
|
||||
}
|
||||
|
||||
@@ -51,13 +57,13 @@ static void nft_quota_obj_eval(struct nft_object *obj,
|
||||
const struct nft_pktinfo *pkt)
|
||||
{
|
||||
struct nft_quota *priv = nft_obj_data(obj);
|
||||
bool overquota;
|
||||
bool overquota, report;
|
||||
|
||||
overquota = nft_overquota(priv, pkt->skb);
|
||||
overquota = nft_overquota(priv, pkt->skb, &report);
|
||||
if (overquota ^ nft_quota_invert(priv))
|
||||
regs->verdict.code = NFT_BREAK;
|
||||
|
||||
if (overquota &&
|
||||
if (report &&
|
||||
!test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
|
||||
nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
|
||||
NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC);
|
||||
|
||||
@@ -663,6 +663,9 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f,
|
||||
check_add_overflow(rules, extra, &rules_alloc))
|
||||
return -EOVERFLOW;
|
||||
|
||||
if (rules_alloc > (INT_MAX / sizeof(*new_mt)))
|
||||
return -ENOMEM;
|
||||
|
||||
new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL_ACCOUNT);
|
||||
if (!new_mt)
|
||||
return -ENOMEM;
|
||||
@@ -683,6 +686,30 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* lt_calculate_size() - Get storage size for lookup table with overflow check
|
||||
* @groups: Amount of bit groups
|
||||
* @bb: Number of bits grouped together in lookup table buckets
|
||||
* @bsize: Size of each bucket in lookup table, in longs
|
||||
*
|
||||
* Return: allocation size including alignment overhead, negative on overflow
|
||||
*/
|
||||
static ssize_t lt_calculate_size(unsigned int groups, unsigned int bb,
|
||||
unsigned int bsize)
|
||||
{
|
||||
ssize_t ret = groups * NFT_PIPAPO_BUCKETS(bb) * sizeof(long);
|
||||
|
||||
if (check_mul_overflow(ret, bsize, &ret))
|
||||
return -1;
|
||||
if (check_add_overflow(ret, NFT_PIPAPO_ALIGN_HEADROOM, &ret))
|
||||
return -1;
|
||||
if (ret > INT_MAX)
|
||||
return -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* pipapo_resize() - Resize lookup or mapping table, or both
|
||||
* @f: Field containing lookup and mapping tables
|
||||
@@ -701,6 +728,7 @@ static int pipapo_resize(struct nft_pipapo_field *f,
|
||||
long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p;
|
||||
unsigned int new_bucket_size, copy;
|
||||
int group, bucket, err;
|
||||
ssize_t lt_size;
|
||||
|
||||
if (rules >= NFT_PIPAPO_RULE0_MAX)
|
||||
return -ENOSPC;
|
||||
@@ -719,10 +747,11 @@ static int pipapo_resize(struct nft_pipapo_field *f,
|
||||
else
|
||||
copy = new_bucket_size;
|
||||
|
||||
new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) *
|
||||
new_bucket_size * sizeof(*new_lt) +
|
||||
NFT_PIPAPO_ALIGN_HEADROOM,
|
||||
GFP_KERNEL);
|
||||
lt_size = lt_calculate_size(f->groups, f->bb, new_bucket_size);
|
||||
if (lt_size < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT);
|
||||
if (!new_lt)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -907,7 +936,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
|
||||
{
|
||||
unsigned int groups, bb;
|
||||
unsigned long *new_lt;
|
||||
size_t lt_size;
|
||||
ssize_t lt_size;
|
||||
|
||||
lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize *
|
||||
sizeof(*f->lt);
|
||||
@@ -917,15 +946,17 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
|
||||
groups = f->groups * 2;
|
||||
bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET;
|
||||
|
||||
lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
|
||||
sizeof(*f->lt);
|
||||
lt_size = lt_calculate_size(groups, bb, f->bsize);
|
||||
if (lt_size < 0)
|
||||
return;
|
||||
} else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET &&
|
||||
lt_size < NFT_PIPAPO_LT_SIZE_LOW) {
|
||||
groups = f->groups / 2;
|
||||
bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET;
|
||||
|
||||
lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
|
||||
sizeof(*f->lt);
|
||||
lt_size = lt_calculate_size(groups, bb, f->bsize);
|
||||
if (lt_size < 0)
|
||||
return;
|
||||
|
||||
/* Don't increase group width if the resulting lookup table size
|
||||
* would exceed the upper size threshold for a "small" set.
|
||||
@@ -936,7 +967,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
|
||||
return;
|
||||
}
|
||||
|
||||
new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT);
|
||||
new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT);
|
||||
if (!new_lt)
|
||||
return;
|
||||
|
||||
@@ -1451,13 +1482,15 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
|
||||
|
||||
for (i = 0; i < old->field_count; i++) {
|
||||
unsigned long *new_lt;
|
||||
ssize_t lt_size;
|
||||
|
||||
memcpy(dst, src, offsetof(struct nft_pipapo_field, lt));
|
||||
|
||||
new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) *
|
||||
src->bsize * sizeof(*dst->lt) +
|
||||
NFT_PIPAPO_ALIGN_HEADROOM,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
lt_size = lt_calculate_size(src->groups, src->bb, src->bsize);
|
||||
if (lt_size < 0)
|
||||
goto out_lt;
|
||||
|
||||
new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT);
|
||||
if (!new_lt)
|
||||
goto out_lt;
|
||||
|
||||
@@ -1469,6 +1502,9 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
|
||||
src->groups * NFT_PIPAPO_BUCKETS(src->bb));
|
||||
|
||||
if (src->rules > 0) {
|
||||
if (src->rules_alloc > (INT_MAX / sizeof(*src->mt)))
|
||||
goto out_mt;
|
||||
|
||||
dst->mt = kvmalloc_array(src->rules_alloc,
|
||||
sizeof(*src->mt),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
|
||||
@@ -12,6 +12,7 @@ TEST_PROGS += conntrack_dump_flush.sh
|
||||
TEST_PROGS += conntrack_icmp_related.sh
|
||||
TEST_PROGS += conntrack_ipip_mtu.sh
|
||||
TEST_PROGS += conntrack_tcp_unreplied.sh
|
||||
TEST_PROGS += conntrack_resize.sh
|
||||
TEST_PROGS += conntrack_sctp_collision.sh
|
||||
TEST_PROGS += conntrack_vrf.sh
|
||||
TEST_PROGS += conntrack_reverse_clash.sh
|
||||
|
||||
@@ -46,6 +46,7 @@ CONFIG_NETFILTER_XT_MATCH_STATE=m
|
||||
CONFIG_NETFILTER_XT_MATCH_STRING=m
|
||||
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
|
||||
CONFIG_NF_CONNTRACK=m
|
||||
CONFIG_NF_CONNTRACK_PROCFS=y
|
||||
CONFIG_NF_CONNTRACK_EVENTS=y
|
||||
CONFIG_NF_CONNTRACK_FTP=m
|
||||
CONFIG_NF_CONNTRACK_MARK=y
|
||||
|
||||
406
tools/testing/selftests/net/netfilter/conntrack_resize.sh
Executable file
406
tools/testing/selftests/net/netfilter/conntrack_resize.sh
Executable file
@@ -0,0 +1,406 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
source lib.sh
|
||||
|
||||
checktool "conntrack --version" "run test without conntrack"
|
||||
checktool "nft --version" "run test without nft tool"
|
||||
|
||||
init_net_max=0
|
||||
ct_buckets=0
|
||||
tmpfile=""
|
||||
ret=0
|
||||
|
||||
modprobe -q nf_conntrack
|
||||
if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then
|
||||
echo "SKIP: conntrack sysctls not available"
|
||||
exit $KSFT_SKIP
|
||||
fi
|
||||
|
||||
init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1
|
||||
ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1
|
||||
|
||||
cleanup() {
|
||||
cleanup_all_ns
|
||||
|
||||
rm -f "$tmpfile"
|
||||
|
||||
# restore original sysctl setting
|
||||
sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
|
||||
sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
check_max_alias()
|
||||
{
|
||||
local expected="$1"
|
||||
# old name, expected to alias to the first, i.e. changing one
|
||||
# changes the other as well.
|
||||
local lv=$(sysctl -n net.nf_conntrack_max)
|
||||
|
||||
if [ $expected -ne "$lv" ];then
|
||||
echo "nf_conntrack_max sysctls should have identical values"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
insert_ctnetlink() {
|
||||
local ns="$1"
|
||||
local count="$2"
|
||||
local i=0
|
||||
local bulk=16
|
||||
|
||||
while [ $i -lt $count ] ;do
|
||||
ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \
|
||||
if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
|
||||
-d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
|
||||
--protonum 17 --timeout 120 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \
|
||||
return;\
|
||||
fi & \
|
||||
done ; wait" 2>/dev/null
|
||||
|
||||
i=$((i+bulk))
|
||||
done
|
||||
}
|
||||
|
||||
check_ctcount() {
|
||||
local ns="$1"
|
||||
local count="$2"
|
||||
local msg="$3"
|
||||
|
||||
local now=$(ip netns exec "$ns" conntrack -C)
|
||||
|
||||
if [ $now -ne "$count" ] ;then
|
||||
echo "expected $count entries in $ns, not $now: $msg"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PASS: got $count connections: $msg"
|
||||
}
|
||||
|
||||
ctresize() {
|
||||
local duration="$1"
|
||||
local now=$(date +%s)
|
||||
local end=$((now + duration))
|
||||
|
||||
while [ $now -lt $end ]; do
|
||||
sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM
|
||||
now=$(date +%s)
|
||||
done
|
||||
}
|
||||
|
||||
do_rsleep() {
|
||||
local limit="$1"
|
||||
local r=$RANDOM
|
||||
|
||||
r=$((r%limit))
|
||||
sleep "$r"
|
||||
}
|
||||
|
||||
ct_flush_once() {
|
||||
local ns="$1"
|
||||
|
||||
ip netns exec "$ns" conntrack -F 2>/dev/null
|
||||
}
|
||||
|
||||
ctflush() {
|
||||
local ns="$1"
|
||||
local duration="$2"
|
||||
local now=$(date +%s)
|
||||
local end=$((now + duration))
|
||||
|
||||
do_rsleep "$duration"
|
||||
|
||||
while [ $now -lt $end ]; do
|
||||
ct_flush_once "$ns"
|
||||
do_rsleep "$duration"
|
||||
now=$(date +%s)
|
||||
done
|
||||
}
|
||||
|
||||
ctflood()
|
||||
{
|
||||
local ns="$1"
|
||||
local duration="$2"
|
||||
local msg="$3"
|
||||
local now=$(date +%s)
|
||||
local end=$((now + duration))
|
||||
local j=0
|
||||
local k=0
|
||||
|
||||
while [ $now -lt $end ]; do
|
||||
j=$((j%256))
|
||||
k=$((k%256))
|
||||
|
||||
ip netns exec "$ns" bash -c \
|
||||
"j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1
|
||||
|
||||
j=$((j+1))
|
||||
|
||||
if [ $j -eq 256 ];then
|
||||
k=$((k+1))
|
||||
fi
|
||||
|
||||
now=$(date +%s)
|
||||
done
|
||||
|
||||
wait
|
||||
}
|
||||
|
||||
# dump to /dev/null. We don't want dumps to cause infinite loops
|
||||
# or use-after-free even when conntrack table is altered while dumps
|
||||
# are in progress.
|
||||
ct_nulldump()
|
||||
{
|
||||
local ns="$1"
|
||||
|
||||
ip netns exec "$ns" conntrack -L > /dev/null 2>&1 &
|
||||
|
||||
# Don't require /proc support in conntrack
|
||||
if [ -r /proc/self/net/nf_conntrack ] ; then
|
||||
ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null &
|
||||
fi
|
||||
|
||||
wait
|
||||
}
|
||||
|
||||
check_taint()
|
||||
{
|
||||
local tainted_then="$1"
|
||||
local msg="$2"
|
||||
|
||||
local tainted_now=0
|
||||
|
||||
if [ "$tainted_then" -ne 0 ];then
|
||||
return
|
||||
fi
|
||||
|
||||
read tainted_now < /proc/sys/kernel/tainted
|
||||
|
||||
if [ "$tainted_now" -eq 0 ];then
|
||||
echo "PASS: $msg"
|
||||
else
|
||||
echo "TAINT: $msg"
|
||||
dmesg
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
insert_flood()
|
||||
{
|
||||
local n="$1"
|
||||
local r=0
|
||||
|
||||
r=$((RANDOM%2000))
|
||||
|
||||
ctflood "$n" "$timeout" "floodresize" &
|
||||
insert_ctnetlink "$n" "$r" &
|
||||
ctflush "$n" "$timeout" &
|
||||
ct_nulldump "$n" &
|
||||
|
||||
wait
|
||||
}
|
||||
|
||||
test_floodresize_all()
|
||||
{
|
||||
local timeout=20
|
||||
local n=""
|
||||
local tainted_then=""
|
||||
|
||||
read tainted_then < /proc/sys/kernel/tainted
|
||||
|
||||
for n in "$nsclient1" "$nsclient2";do
|
||||
insert_flood "$n" &
|
||||
done
|
||||
|
||||
# resize table constantly while flood/insert/dump/flushs
|
||||
# are happening in parallel.
|
||||
ctresize "$timeout"
|
||||
|
||||
# wait for subshells to complete, everything is limited
|
||||
# by $timeout.
|
||||
wait
|
||||
|
||||
check_taint "$tainted_then" "resize+flood"
|
||||
}
|
||||
|
||||
check_dump()
|
||||
{
|
||||
local ns="$1"
|
||||
local protoname="$2"
|
||||
local c=0
|
||||
local proto=0
|
||||
local proc=0
|
||||
local unique=""
|
||||
|
||||
c=$(ip netns exec "$ns" conntrack -C)
|
||||
|
||||
# NOTE: assumes timeouts are large enough to not have
|
||||
# expirations in all following tests.
|
||||
l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | tee "$tmpfile" | wc -l)
|
||||
|
||||
if [ "$c" -ne "$l" ]; then
|
||||
echo "FAIL: count inconsistency for $ns: $c != $l"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# check the dump we retrieved is free of duplicated entries.
|
||||
unique=$(sort "$tmpfile" | uniq | wc -l)
|
||||
if [ "$l" -ne "$unique" ]; then
|
||||
echo "FAIL: count identical but listing contained redundant entries: $l != $unique"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# we either inserted icmp or only udp, hence, --proto should return same entry count as without filter.
|
||||
proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | wc -l)
|
||||
if [ "$l" -ne "$proto" ]; then
|
||||
echo "FAIL: dump inconsistency for $ns: $l != $proto"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
if [ -r /proc/self/net/nf_conntrack ] ; then
|
||||
proc=$(ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack")
|
||||
|
||||
if [ "$l" -ne "$proc" ]; then
|
||||
echo "FAIL: proc inconsistency for $ns: $l != $proc"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | uniq | wc -l")
|
||||
|
||||
if [ "$l" -ne "$proc" ]; then
|
||||
echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc"
|
||||
ret=1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "PASS: dump in netns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)"
|
||||
}
|
||||
|
||||
test_dump_all()
|
||||
{
|
||||
local timeout=3
|
||||
local tainted_then=""
|
||||
|
||||
read tainted_then < /proc/sys/kernel/tainted
|
||||
|
||||
ct_flush_once "$nsclient1"
|
||||
ct_flush_once "$nsclient2"
|
||||
|
||||
ctflood "$nsclient1" $timeout "dumpall" &
|
||||
insert_ctnetlink "$nsclient2" 2000
|
||||
|
||||
wait
|
||||
|
||||
check_dump "$nsclient1" "icmp"
|
||||
check_dump "$nsclient2" "udp"
|
||||
|
||||
check_taint "$tainted_then" "test parallel conntrack dumps"
|
||||
}
|
||||
|
||||
check_sysctl_immutable()
|
||||
{
|
||||
local ns="$1"
|
||||
local name="$2"
|
||||
local failhard="$3"
|
||||
local o=0
|
||||
local n=0
|
||||
|
||||
o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
|
||||
n=$((o+1))
|
||||
|
||||
# return value isn't reliable, need to read it back
|
||||
ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null
|
||||
|
||||
n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
|
||||
|
||||
[ -z "$n" ] && return 1
|
||||
|
||||
if [ $o -ne $n ]; then
|
||||
if [ $failhard -gt 0 ] ;then
|
||||
echo "FAIL: net.$name should not be changeable from namespace (now $n)"
|
||||
ret=1
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
test_conntrack_max_limit()
|
||||
{
|
||||
sysctl -q net.netfilter.nf_conntrack_max=100
|
||||
insert_ctnetlink "$nsclient1" 101
|
||||
|
||||
# check netns is clamped by init_net, i.e., either netns follows
|
||||
# init_net value, or a higher pernet limit (compared to init_net) is ignored.
|
||||
check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound"
|
||||
|
||||
sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
|
||||
}
|
||||
|
||||
test_conntrack_disable()
|
||||
{
|
||||
local timeout=2
|
||||
|
||||
# disable conntrack pickups
|
||||
ip netns exec "$nsclient1" nft flush table ip test_ct
|
||||
|
||||
ct_flush_once "$nsclient1"
|
||||
ct_flush_once "$nsclient2"
|
||||
|
||||
ctflood "$nsclient1" "$timeout" "conntrack disable"
|
||||
ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
|
||||
|
||||
# Disabled, should not have picked up any connection.
|
||||
check_ctcount "$nsclient1" 0 "conntrack disabled"
|
||||
|
||||
# This one is still active, expect 1 connection.
|
||||
check_ctcount "$nsclient2" 1 "conntrack enabled"
|
||||
}
|
||||
|
||||
init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max)
|
||||
|
||||
check_max_alias $init_net_max
|
||||
|
||||
sysctl -q net.netfilter.nf_conntrack_max="262000"
|
||||
check_max_alias 262000
|
||||
|
||||
setup_ns nsclient1 nsclient2
|
||||
|
||||
# check this only works from init_net
|
||||
for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do
|
||||
check_sysctl_immutable "$nsclient1" "net.$n" 1
|
||||
done
|
||||
|
||||
# won't work on older kernels. If it works, check that the netns obeys the limit
|
||||
if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then
|
||||
# subtest: if pernet is changeable, check that reducing it in pernet
|
||||
# limits the pernet entries. Inverse, pernet clamped by a lower init_net
|
||||
# setting, is already checked by "test_conntrack_max_limit" test.
|
||||
|
||||
ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1
|
||||
insert_ctnetlink "$nsclient1" 2
|
||||
check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound"
|
||||
ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
|
||||
fi
|
||||
|
||||
for n in "$nsclient1" "$nsclient2";do
|
||||
# enable conntrack in both namespaces
|
||||
ip netns exec "$n" nft -f - <<EOF
|
||||
table ip test_ct {
|
||||
chain input {
|
||||
type filter hook input priority 0
|
||||
ct state new counter
|
||||
}
|
||||
}
|
||||
EOF
|
||||
done
|
||||
|
||||
tmpfile=$(mktemp)
|
||||
test_conntrack_max_limit
|
||||
test_dump_all
|
||||
test_floodresize_all
|
||||
test_conntrack_disable
|
||||
|
||||
exit $ret
|
||||
@@ -45,6 +45,19 @@ table inet filter {
|
||||
EOF
|
||||
}
|
||||
|
||||
load_input_ruleset() {
|
||||
local netns=$1
|
||||
|
||||
ip netns exec "$netns" nft -f /dev/stdin <<EOF
|
||||
table inet filter {
|
||||
chain input {
|
||||
type filter hook input priority 0; policy accept;
|
||||
fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
|
||||
}
|
||||
}
|
||||
EOF
|
||||
}
|
||||
|
||||
load_pbr_ruleset() {
|
||||
local netns=$1
|
||||
|
||||
@@ -165,6 +178,16 @@ check_drops || exit 1
|
||||
|
||||
echo "PASS: fib expression did not cause unwanted packet drops"
|
||||
|
||||
load_input_ruleset "$ns1"
|
||||
|
||||
test_ping 127.0.0.1 ::1 || exit 1
|
||||
check_drops || exit 1
|
||||
|
||||
test_ping 10.0.1.99 dead:1::99 || exit 1
|
||||
check_drops || exit 1
|
||||
|
||||
echo "PASS: fib expression did not discard loopback packets"
|
||||
|
||||
ip netns exec "$nsrouter" nft flush table inet filter
|
||||
|
||||
ip -net "$ns1" route del default
|
||||
|
||||
Reference in New Issue
Block a user