mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 12:21:22 -05:00
Merge tag 'nf-next-25-09-11' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Florian Westphal says: ==================== netfilter: updates for net-next 1) Don't respond to ICMP_UNREACH errors with another ICMP_UNREACH error. 2) Support fetching the current bridge ethernet address. This allows a more flexible approach to packet redirection on bridges without need to use hardcoded addresses. From Fernando Fernandez Mancera. 3) Zap a few no-longer needed conditionals from ipvs packet path and convert to READ/WRITE_ONCE to avoid KCSAN warnings. From Zhang Tengfei. 4) Remove a no-longer-used macro argument in ipset, from Zhen Ni. * tag 'nf-next-25-09-11' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: netfilter: nf_reject: don't reply to icmp error messages ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable netfilter: nft_meta_bridge: introduce NFT_META_BRI_IIFHWADDR support netfilter: ipset: Remove unused htable_bits in macro ahash_region selftest:net: fixed spelling mistakes ==================== Link: https://patch.msgid.link/20250911143819.14753-1-fw@strlen.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -959,6 +959,7 @@ enum nft_exthdr_attributes {
|
||||
* @NFT_META_SDIF: slave device interface index
|
||||
* @NFT_META_SDIFNAME: slave device interface name
|
||||
* @NFT_META_BRI_BROUTE: packet br_netfilter_broute bit
|
||||
* @NFT_META_BRI_IIFHWADDR: packet input bridge interface ethernet address
|
||||
*/
|
||||
enum nft_meta_keys {
|
||||
NFT_META_LEN,
|
||||
@@ -999,6 +1000,7 @@ enum nft_meta_keys {
|
||||
NFT_META_SDIFNAME,
|
||||
NFT_META_BRI_BROUTE,
|
||||
__NFT_META_IIFTYPE,
|
||||
NFT_META_BRI_IIFHWADDR,
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -59,6 +59,13 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
|
||||
nft_reg_store_be16(dest, htons(p_proto));
|
||||
return;
|
||||
}
|
||||
case NFT_META_BRI_IIFHWADDR:
|
||||
br_dev = nft_meta_get_bridge(in);
|
||||
if (!br_dev)
|
||||
goto err;
|
||||
|
||||
memcpy(dest, br_dev->dev_addr, ETH_ALEN);
|
||||
return;
|
||||
default:
|
||||
return nft_meta_get_eval(expr, regs, pkt);
|
||||
}
|
||||
@@ -86,6 +93,9 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
|
||||
case NFT_META_BRI_IIFVPROTO:
|
||||
len = sizeof(u16);
|
||||
break;
|
||||
case NFT_META_BRI_IIFHWADDR:
|
||||
len = ETH_ALEN;
|
||||
break;
|
||||
default:
|
||||
return nft_meta_get_init(ctx, expr, tb);
|
||||
}
|
||||
@@ -175,6 +185,7 @@ static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
|
||||
|
||||
switch (priv->key) {
|
||||
case NFT_META_BRI_BROUTE:
|
||||
case NFT_META_BRI_IIFHWADDR:
|
||||
hooks = 1 << NF_BR_PRE_ROUTING;
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -80,6 +80,27 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_reject_skb_v4_tcp_reset);
|
||||
|
||||
static bool nf_skb_is_icmp_unreach(const struct sk_buff *skb)
|
||||
{
|
||||
const struct iphdr *iph = ip_hdr(skb);
|
||||
u8 *tp, _type;
|
||||
int thoff;
|
||||
|
||||
if (iph->protocol != IPPROTO_ICMP)
|
||||
return false;
|
||||
|
||||
thoff = skb_network_offset(skb) + sizeof(*iph);
|
||||
|
||||
tp = skb_header_pointer(skb,
|
||||
thoff + offsetof(struct icmphdr, type),
|
||||
sizeof(_type), &_type);
|
||||
|
||||
if (!tp)
|
||||
return false;
|
||||
|
||||
return *tp == ICMP_DEST_UNREACH;
|
||||
}
|
||||
|
||||
struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
|
||||
struct sk_buff *oldskb,
|
||||
const struct net_device *dev,
|
||||
@@ -100,6 +121,10 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
|
||||
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
|
||||
return NULL;
|
||||
|
||||
/* don't reply to ICMP_DEST_UNREACH with ICMP_DEST_UNREACH. */
|
||||
if (nf_skb_is_icmp_unreach(oldskb))
|
||||
return NULL;
|
||||
|
||||
/* RFC says return as much as we can without exceeding 576 bytes. */
|
||||
len = min_t(unsigned int, 536, oldskb->len);
|
||||
|
||||
|
||||
@@ -104,6 +104,32 @@ struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_reject_skb_v6_tcp_reset);
|
||||
|
||||
static bool nf_skb_is_icmp6_unreach(const struct sk_buff *skb)
|
||||
{
|
||||
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
|
||||
u8 proto = ip6h->nexthdr;
|
||||
u8 _type, *tp;
|
||||
int thoff;
|
||||
__be16 fo;
|
||||
|
||||
thoff = ipv6_skip_exthdr(skb, ((u8 *)(ip6h + 1) - skb->data), &proto, &fo);
|
||||
|
||||
if (thoff < 0 || thoff >= skb->len || fo != 0)
|
||||
return false;
|
||||
|
||||
if (proto != IPPROTO_ICMPV6)
|
||||
return false;
|
||||
|
||||
tp = skb_header_pointer(skb,
|
||||
thoff + offsetof(struct icmp6hdr, icmp6_type),
|
||||
sizeof(_type), &_type);
|
||||
|
||||
if (!tp)
|
||||
return false;
|
||||
|
||||
return *tp == ICMPV6_DEST_UNREACH;
|
||||
}
|
||||
|
||||
struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
|
||||
struct sk_buff *oldskb,
|
||||
const struct net_device *dev,
|
||||
@@ -117,6 +143,10 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
|
||||
if (!nf_reject_ip6hdr_validate(oldskb))
|
||||
return NULL;
|
||||
|
||||
/* Don't reply to ICMPV6_DEST_UNREACH with ICMPV6_DEST_UNREACH */
|
||||
if (nf_skb_is_icmp6_unreach(oldskb))
|
||||
return NULL;
|
||||
|
||||
/* Include "As much of invoking packet as possible without the ICMPv6
|
||||
* packet exceeding the minimum IPv6 MTU" in the ICMP payload.
|
||||
*/
|
||||
|
||||
@@ -63,7 +63,7 @@ struct hbucket {
|
||||
: jhash_size((htable_bits) - HTABLE_REGION_BITS))
|
||||
#define ahash_sizeof_regions(htable_bits) \
|
||||
(ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
|
||||
#define ahash_region(n, htable_bits) \
|
||||
#define ahash_region(n) \
|
||||
((n) / jhash_size(HTABLE_REGION_BITS))
|
||||
#define ahash_bucket_start(h, htable_bits) \
|
||||
((htable_bits) < HTABLE_REGION_BITS ? 0 \
|
||||
@@ -702,7 +702,7 @@ mtype_resize(struct ip_set *set, bool retried)
|
||||
#endif
|
||||
key = HKEY(data, h->initval, htable_bits);
|
||||
m = __ipset_dereference(hbucket(t, key));
|
||||
nr = ahash_region(key, htable_bits);
|
||||
nr = ahash_region(key);
|
||||
if (!m) {
|
||||
m = kzalloc(sizeof(*m) +
|
||||
AHASH_INIT_SIZE * dsize,
|
||||
@@ -852,7 +852,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
|
||||
rcu_read_lock_bh();
|
||||
t = rcu_dereference_bh(h->table);
|
||||
key = HKEY(value, h->initval, t->htable_bits);
|
||||
r = ahash_region(key, t->htable_bits);
|
||||
r = ahash_region(key);
|
||||
atomic_inc(&t->uref);
|
||||
elements = t->hregion[r].elements;
|
||||
maxelem = t->maxelem;
|
||||
@@ -1050,7 +1050,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
|
||||
rcu_read_lock_bh();
|
||||
t = rcu_dereference_bh(h->table);
|
||||
key = HKEY(value, h->initval, t->htable_bits);
|
||||
r = ahash_region(key, t->htable_bits);
|
||||
r = ahash_region(key);
|
||||
atomic_inc(&t->uref);
|
||||
rcu_read_unlock_bh();
|
||||
|
||||
|
||||
@@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
|
||||
* conntrack cleanup for the net.
|
||||
*/
|
||||
smp_rmb();
|
||||
if (ipvs->enable)
|
||||
if (READ_ONCE(ipvs->enable))
|
||||
ip_vs_conn_drop_conntrack(cp);
|
||||
}
|
||||
|
||||
@@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
|
||||
cond_resched_rcu();
|
||||
|
||||
/* netns clean up started, abort delayed work */
|
||||
if (!ipvs->enable)
|
||||
if (!READ_ONCE(ipvs->enable))
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
|
||||
if (unlikely(!skb_dst(skb)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
if (!ipvs->enable)
|
||||
return NF_ACCEPT;
|
||||
|
||||
ip_vs_fill_iph_skb(af, skb, false, &iph);
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
if (af == AF_INET6) {
|
||||
@@ -1940,7 +1937,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
/* ipvs enabled in this netns ? */
|
||||
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
|
||||
if (unlikely(sysctl_backup_only(ipvs)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
ip_vs_fill_iph_skb(af, skb, false, &iph);
|
||||
@@ -2108,7 +2105,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
|
||||
int r;
|
||||
|
||||
/* ipvs enabled in this netns ? */
|
||||
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
|
||||
if (unlikely(sysctl_backup_only(ipvs)))
|
||||
return NF_ACCEPT;
|
||||
|
||||
if (state->pf == NFPROTO_IPV4) {
|
||||
@@ -2295,7 +2292,7 @@ static int __net_init __ip_vs_init(struct net *net)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Hold the beast until a service is registered */
|
||||
ipvs->enable = 0;
|
||||
WRITE_ONCE(ipvs->enable, 0);
|
||||
ipvs->net = net;
|
||||
/* Counters used for creating unique names */
|
||||
ipvs->gen = atomic_read(&ipvs_netns_cnt);
|
||||
@@ -2367,7 +2364,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
|
||||
ipvs = net_ipvs(net);
|
||||
ip_vs_unregister_hooks(ipvs, AF_INET);
|
||||
ip_vs_unregister_hooks(ipvs, AF_INET6);
|
||||
ipvs->enable = 0; /* Disable packet reception */
|
||||
WRITE_ONCE(ipvs->enable, 0); /* Disable packet reception */
|
||||
smp_wmb();
|
||||
ip_vs_sync_net_cleanup(ipvs);
|
||||
}
|
||||
|
||||
@@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
|
||||
struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
|
||||
|
||||
/* netns clean up started, abort delayed work */
|
||||
if (!ipvs->enable)
|
||||
if (!READ_ONCE(ipvs->enable))
|
||||
goto unlock;
|
||||
if (!kd)
|
||||
continue;
|
||||
@@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
|
||||
|
||||
*svc_p = svc;
|
||||
|
||||
if (!ipvs->enable) {
|
||||
if (!READ_ONCE(ipvs->enable)) {
|
||||
/* Now there is a service - full throttle */
|
||||
ipvs->enable = 1;
|
||||
WRITE_ONCE(ipvs->enable, 1);
|
||||
|
||||
/* Start estimation for first time */
|
||||
ip_vs_est_reload_start(ipvs);
|
||||
|
||||
@@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
|
||||
void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
|
||||
{
|
||||
/* Ignore reloads before first service is added */
|
||||
if (!ipvs->enable)
|
||||
if (!READ_ONCE(ipvs->enable))
|
||||
return;
|
||||
ip_vs_est_stopped_recalc(ipvs);
|
||||
/* Bump the kthread configuration genid */
|
||||
@@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
|
||||
int i;
|
||||
|
||||
if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
|
||||
ipvs->enable && ipvs->est_max_threads)
|
||||
READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&ipvs->est_mutex);
|
||||
@@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
|
||||
}
|
||||
|
||||
/* Start kthread tasks only when services are present */
|
||||
if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
|
||||
if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
|
||||
ret = ip_vs_est_kthread_start(ipvs, kd);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
|
||||
struct ip_vs_estimator *est = &stats->est;
|
||||
int ret;
|
||||
|
||||
if (!ipvs->est_max_threads && ipvs->enable)
|
||||
if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
|
||||
ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
|
||||
|
||||
est->ktid = -1;
|
||||
@@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
|
||||
/* Wait for cpufreq frequency transition */
|
||||
wait_event_idle_timeout(wq, kthread_should_stop(),
|
||||
HZ / 50);
|
||||
if (!ipvs->enable || kthread_should_stop())
|
||||
if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
|
||||
goto stop;
|
||||
}
|
||||
|
||||
@@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
|
||||
rcu_read_unlock();
|
||||
local_bh_enable();
|
||||
|
||||
if (!ipvs->enable || kthread_should_stop())
|
||||
if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
|
||||
goto stop;
|
||||
cond_resched();
|
||||
|
||||
@@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
|
||||
mutex_lock(&ipvs->est_mutex);
|
||||
for (id = 1; id < ipvs->est_kt_count; id++) {
|
||||
/* netns clean up started, abort */
|
||||
if (!ipvs->enable)
|
||||
if (!READ_ONCE(ipvs->enable))
|
||||
goto unlock2;
|
||||
kd = ipvs->est_kt_arr[id];
|
||||
if (!kd)
|
||||
@@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
|
||||
id = ipvs->est_kt_count;
|
||||
|
||||
next_kt:
|
||||
if (!ipvs->enable || kthread_should_stop())
|
||||
if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
|
||||
goto unlock;
|
||||
id--;
|
||||
if (id < 0)
|
||||
|
||||
@@ -569,7 +569,7 @@ test_redirect6()
|
||||
ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
|
||||
if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
|
||||
echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
|
||||
echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
|
||||
lret=1
|
||||
fi
|
||||
|
||||
@@ -859,7 +859,7 @@ EOF
|
||||
# from router:service bypass connection tracking.
|
||||
test_port_shadow_notrack "$family"
|
||||
|
||||
# test nat based mitigation: fowarded packets coming from service port
|
||||
# test nat based mitigation: forwarded packets coming from service port
|
||||
# are masqueraded with random highport.
|
||||
test_port_shadow_pat "$family"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user