Merge branch 'follow-up-on-deduplicate-cookie-logic'

Willem de Bruijn says:

====================
follow-up on deduplicate cookie logic

1/3: I came across a leftover from cookie deduplication, due to UDP
having two code paths: lockless fast path and locked cork path.

3/3: Even though the leftover was in the fast path, this prompted me
to complete coverage to the cork path.

2/3: That uncovered a subtle API inconsistency in how dontfrag is
configured. It should not be possible to switch DF mid datagram.
====================

Link: https://patch.msgid.link/20250307033620.411611-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-03-10 13:13:06 -07:00
4 changed files with 32 additions and 15 deletions

View File

@@ -207,6 +207,7 @@ struct inet6_cork {
struct ipv6_txoptions *opt;
u8 hop_limit;
u8 tclass;
u8 dontfrag:1;
};
/* struct ipv6_pinfo - ipv6 private area */

View File

@@ -1386,6 +1386,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
}
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
v6_cork->dontfrag = ipc6->dontfrag;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
@@ -1421,7 +1422,7 @@ static int __ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, size_t length, int transhdrlen,
unsigned int flags, struct ipcm6_cookie *ipc6)
unsigned int flags)
{
struct sk_buff *skb, *skb_prev = NULL;
struct inet_cork *cork = &cork_full->base;
@@ -1475,7 +1476,7 @@ static int __ip6_append_data(struct sock *sk,
if (headersize + transhdrlen > mtu)
goto emsgsize;
if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
if (cork->length + length > mtu - headersize && v6_cork->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
sk->sk_protocol == IPPROTO_ICMPV6 ||
sk->sk_protocol == IPPROTO_RAW)) {
@@ -1855,7 +1856,7 @@ int ip6_append_data(struct sock *sk,
return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
&np->cork, sk_page_frag(sk), getfrag,
from, length, transhdrlen, flags, ipc6);
from, length, transhdrlen, flags);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -2054,13 +2055,11 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
ip6_cork_release(cork, &v6_cork);
return ERR_PTR(err);
}
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk);
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
flags, ipc6);
flags);
if (err) {
__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
return ERR_PTR(err);

View File

@@ -50,8 +50,9 @@ check_result() {
# IPV6_DONTFRAG
for ovr in setsock cmsg both diff; do
for df in 0 1; do
for p in u i r; do
for p in u U i r; do
[ $p == "u" ] && prot=UDP
[ $p == "U" ] && prot=UDP
[ $p == "i" ] && prot=ICMP
[ $p == "r" ] && prot=RAW
@@ -81,8 +82,9 @@ test_dscp() {
ip $IPVER -netns $NS route add table 300 prohibit any
for ovr in setsock cmsg both diff; do
for p in u i r; do
for p in u U i r; do
[ $p == "u" ] && prot=UDP
[ $p == "U" ] && prot=UDP
[ $p == "i" ] && prot=ICMP
[ $p == "r" ] && prot=RAW
@@ -134,8 +136,9 @@ test_ttl_hoplimit() {
local -r LIM=4
for ovr in setsock cmsg both diff; do
for p in u i r; do
for p in u U i r; do
[ $p == "u" ] && prot=UDP
[ $p == "U" ] && prot=UDP
[ $p == "i" ] && prot=ICMP
[ $p == "r" ] && prot=RAW
@@ -166,7 +169,7 @@ test_ttl_hoplimit -4 $TGT4 ttl
test_ttl_hoplimit -6 $TGT6 hlim
# IPV6 exthdr
for p in u i r; do
for p in u U i r; do
# Very basic "does it crash" test
for h in h d r; do
$NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234

View File

@@ -33,6 +33,7 @@ enum {
ERN_RECVERR,
ERN_CMSG_RD,
ERN_CMSG_RCV,
ERN_SEND_MORE,
};
struct option_cmsg_u32 {
@@ -46,6 +47,7 @@ struct options {
const char *service;
unsigned int size;
unsigned int num_pkt;
bool msg_more;
struct {
unsigned int mark;
unsigned int dontfrag;
@@ -94,7 +96,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
"\t\t-S send() size\n"
"\t\t-4/-6 Force IPv4 / IPv6 only\n"
"\t\t-p prot Socket protocol\n"
"\t\t (u = UDP (default); i = ICMP; r = RAW)\n"
"\t\t (u = UDP (default); i = ICMP; r = RAW;\n"
"\t\t U = UDP with MSG_MORE)\n"
"\n"
"\t\t-m val Set SO_MARK with given value\n"
"\t\t-M val Set SO_MARK via setsockopt\n"
@@ -109,8 +112,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
"\t\t-l val Set TTL/HOPLIMIT via cmsg\n"
"\t\t-L val Set TTL/HOPLIMIT via setsockopt\n"
"\t\t-H type Add an IPv6 header option\n"
"\t\t (h = HOP; d = DST; r = RTDST)"
"");
"\t\t (h = HOP; d = DST; r = RTDST)\n"
"\n");
exit(ERN_HELP);
}
@@ -133,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[])
opt.sock.family = AF_INET6;
break;
case 'p':
if (*optarg == 'u' || *optarg == 'U') {
if (*optarg == 'u') {
opt.sock.proto = IPPROTO_UDP;
} else if (*optarg == 'U') {
opt.sock.proto = IPPROTO_UDP;
opt.msg_more = true;
} else if (*optarg == 'i' || *optarg == 'I') {
opt.sock.proto = IPPROTO_ICMP;
} else if (*optarg == 'r') {
@@ -531,7 +537,7 @@ int main(int argc, char *argv[])
cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
for (i = 0; i < opt.num_pkt; i++) {
err = sendmsg(fd, &msg, 0);
err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0);
if (err < 0) {
if (!opt.silent_send)
fprintf(stderr, "send failed: %s\n", strerror(errno));
@@ -542,6 +548,14 @@ int main(int argc, char *argv[])
err = ERN_SEND_SHORT;
goto err_out;
}
if (opt.msg_more) {
err = write(fd, NULL, 0);
if (err < 0) {
fprintf(stderr, "send more: %s\n", strerror(errno));
err = ERN_SEND_MORE;
goto err_out;
}
}
}
err = ERN_SUCCESS;