Files
linux/include/net/netns/ipv6.h
Maoyi Xie e68eadffb7 ipv6: flowlabel: enforce per-netns limit for unprivileged callers
fl_size, fl_ht and ip6_fl_lock in net/ipv6/ip6_flowlabel.c are
file scope and shared across netns. mem_check() reads fl_size to
decide whether to deny non-CAP_NET_ADMIN callers. capable() runs
against init_user_ns, so an unprivileged user in any non-init
userns can push fl_size past FL_MAX_SIZE - FL_MAX_SIZE / 4 and
starve every other unprivileged userns on the host.

Add struct netns_ipv6::flowlabel_count, bumped and decremented
next to fl_size in fl_intern, ip6_fl_gc and ip6_fl_purge. The new
field fills the existing 4-byte hole after ipmr_seq, so struct
netns_ipv6 stays the same size on 64-bit builds.

Bump FL_MAX_SIZE from 4096 to 8192. It has been 4096 since the
file was added. Machines and connection counts have grown.

mem_check() folds an extra per-netns ceiling into the existing
non-CAP_NET_ADMIN conditional. The ceiling is half of the total
budget that unprivileged callers have ever been able to use, i.e.
(FL_MAX_SIZE - FL_MAX_SIZE / 4) / 2 = 3072 entries. With
FL_MAX_SIZE doubled, this preserves the original per-user reach
of 3K (what an unprivileged caller could already obtain before
this change), while forcing an attacker to spread allocations
across at least two netns to exhaust the global non-CAP_NET_ADMIN
budget.

CAP_NET_ADMIN against init_user_ns still bypasses both caps.

The previous patch took ip6_fl_lock across mem_check and
fl_intern, so the new flowlabel_count read in mem_check and the
new flowlabel_count++ in fl_intern run under the same critical
section. flowlabel_count is therefore plain int, like fl_size.

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Suggested-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Cc: stable@vger.kernel.org # v5.15+
Signed-off-by: Maoyi Xie <maoyi.xie@ntu.edu.sg>
Link: https://patch.msgid.link/20260506082416.2259567-3-maoyixie.tju@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-05-08 14:59:14 -07:00

138 lines
3.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* ipv6 in net namespaces
*/
#include <net/inet_frag.h>
#ifndef __NETNS_IPV6_H__
#define __NETNS_IPV6_H__
#include <net/dst_ops.h>
#include <uapi/linux/icmpv6.h>
struct ctl_table_header;
struct netns_sysctl_ipv6 {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *hdr;
struct ctl_table_header *route_hdr;
struct ctl_table_header *icmp_hdr;
struct ctl_table_header *frags_hdr;
struct ctl_table_header *xfrm6_hdr;
#endif
int flush_delay;
int ip6_rt_max_size;
int ip6_rt_gc_min_interval;
int ip6_rt_gc_timeout;
int ip6_rt_gc_interval;
int ip6_rt_gc_elasticity;
int ip6_rt_mtu_expires;
int ip6_rt_min_advmss;
u32 multipath_hash_fields;
u8 multipath_hash_policy;
__cacheline_group_begin(sysctl_ipv6_flowlabel);
u8 flowlabel_consistency;
u8 auto_flowlabels;
u8 flowlabel_state_ranges;
__cacheline_group_end(sysctl_ipv6_flowlabel);
u8 icmpv6_echo_ignore_all;
u8 icmpv6_echo_ignore_multicast;
u8 icmpv6_echo_ignore_anycast;
int icmpv6_time;
DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1);
unsigned long *icmpv6_ratemask_ptr;
u8 anycast_src_echo_reply;
u8 bindv6only;
u8 ip_nonlocal_bind;
u8 fwmark_reflect;
int idgen_retries;
int idgen_delay;
int flowlabel_reflect;
int max_dst_opts_cnt;
int max_hbh_opts_cnt;
int max_dst_opts_len;
int max_hbh_opts_len;
int seg6_flowlabel;
u32 ioam6_id;
u64 ioam6_id_wide;
u8 skip_notify_on_dev_down;
u8 fib_notify_on_flag_change;
u8 icmpv6_error_anycast_as_unicast;
u8 icmpv6_errors_extension_mask;
};
struct netns_ipv6 {
/* Keep ip6_dst_ops at the beginning of netns_sysctl_ipv6 */
struct dst_ops ip6_dst_ops;
struct netns_sysctl_ipv6 sysctl;
struct ipv6_devconf *devconf_all;
struct ipv6_devconf *devconf_dflt;
struct inet_peer_base *peers;
struct fqdir *fqdir;
struct fib6_info *fib6_null_entry;
struct rt6_info *ip6_null_entry;
struct rt6_statistics *rt6_stats;
struct timer_list ip6_fib_timer;
struct hlist_head *fib_table_hash;
spinlock_t fib_table_hash_lock;
struct fib6_table *fib6_main_tbl;
struct list_head fib6_walkers;
rwlock_t fib6_walker_lock;
spinlock_t fib6_gc_lock;
atomic_t ip6_rt_gc_expire;
unsigned long ip6_rt_last_gc;
unsigned char flowlabel_has_excl;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
bool fib6_has_custom_rules;
unsigned int fib6_rules_require_fldissect;
#ifdef CONFIG_IPV6_SUBTREES
unsigned int fib6_routes_require_src;
#endif
struct rt6_info *ip6_prohibit_entry;
struct rt6_info *ip6_blk_hole_entry;
struct fib6_table *fib6_local_tbl;
struct fib_rules_ops *fib6_rules_ops;
#endif
struct sock *ndisc_sk;
struct sock *tcp_sk;
struct sock *igmp_sk;
struct sock *mc_autojoin_sk;
struct hlist_head *inet6_addr_lst;
spinlock_t addrconf_hash_lock;
struct delayed_work addr_chk_work;
#ifdef CONFIG_IPV6_MROUTE
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
struct mr_table *mrt6;
#else
struct list_head mr6_tables;
struct fib_rules_ops *mr6_rules_ops;
#endif
#endif
atomic_t dev_addr_genid;
atomic_t fib6_sernum;
struct seg6_pernet_data *seg6_data;
struct fib_notifier_ops *notifier_ops;
struct fib_notifier_ops *ip6mr_notifier_ops;
atomic_t ipmr_seq;
int flowlabel_count;
struct {
struct hlist_head head;
spinlock_t lock;
u32 seq;
} ip6addrlbl_table;
struct ioam6_pernet_data *ioam6_data;
};
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct netns_nf_frag {
struct fqdir *fqdir;
};
#endif
#endif