From d26796ae589401d209f63f462d5aee3746f3c51e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:04:55 +0800 Subject: [PATCH 01/16] udp: check udp sock encap_type in __udp_lib_err There is a chance that __udp4/6_lib_lookup() returns a udp encap sock in __udp_lib_err(), like the udp encap listening sock may use the same port as remote encap port, in which case it should go to __udp4/6_lib_err_encap() for more validation before processing the icmp packet. This patch is to check encap_type in __udp_lib_err() for the further validation for a encap sock. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 09f0a23d1a01..ca04a8a35e52 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -702,7 +702,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); - if (!sk) { + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udp_encap_needed_key)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 29d9691359b9..cde9b8874d4b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -560,7 +560,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); - if (!sk) { + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udpv6_encap_needed_key)) { From 3c7d4415db6a2fa9312e3b5a3e7ca08ed09c9f57 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:04:56 +0800 Subject: [PATCH 02/16] udp6: move the mss check after udp gso tunnel processing For some protocol's gso, like SCTP, it's using GSO_BY_FRAGS for gso_size. When using UDP to encapsulate its packet, it will return error in udp6_ufo_fragment() as skb->len < gso_size, and it will never go to the gso tunnel processing. So we should move this check after udp gso tunnel processing, the same as udp4_ufo_fragment() does. v1->v2: - no change. v2->v3: - not do any cleanup. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/ipv6/udp_offload.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 584157a07759..aa602af8e49d 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -28,10 +28,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int tnl_hlen; int err; - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features, true); @@ -48,6 +44,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) return __udp_gso_segment(skb, features); + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot * do checksum of UDP packets sent as multiple IP fragments. */ From 527beb8ef9c02c11f8ca0d59fc46f7d081db1c33 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:04:57 +0800 Subject: [PATCH 03/16] udp: support sctp over udp in skb_udp_tunnel_segment For the gso of sctp over udp packets, sctp_gso_segment() will be called in skb_udp_tunnel_segment(), we need to set transport_header to sctp header. As all the current HWs can't handle both crc checksum and udp checksum at the same time, the crc checksum has to be done in sctp_gso_segment() by removing the NETIF_F_SCTP_CRC flag from the features. Meanwhile, if the HW can't do udp checksum, csum and csum_start has to be set correctly, and udp checksum will be done in __skb_udp_tunnel_segment() by calling gso_make_checksum(). Thanks to Paolo, Marcelo and Guillaume for helping with this one. v1->v2: - no change. v2->v3: - remove the he NETIF_F_SCTP_CRC flag from the features. - set csum and csum_start in sctp_gso_make_checksum(). Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 3 +++ net/sctp/offload.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index e67a66fbf27b..b8b1fdeb5af9 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -49,6 +49,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb_set_transport_header(skb, skb_inner_transport_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); skb->protocol = new_protocol; @@ -67,6 +68,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); features &= skb->dev->hw_enc_features; + /* CRC checksum can't be handled by HW when it's a UDP tunneling packet. */ + features &= ~NETIF_F_SCTP_CRC; /* The only checksum offload we care about from here on out is the * outer one so strip the existing checksum feature flags and diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 74847d613835..ce281a9a2875 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -27,7 +27,11 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; skb->csum_not_inet = 0; - gso_reset_checksum(skb, ~0); + /* csum and csum_start in GSO CB may be needed to do the UDP + * checksum when it's a UDP tunneling packet. + */ + SKB_GSO_CB(skb)->csum = (__force __wsum)~0; + SKB_GSO_CB(skb)->csum_start = skb_headroom(skb) + skb->len; return sctp_compute_cksum(skb, skb_transport_offset(skb)); } From 965ae44412f8c0c19945b3f62bc945ad0b15a8aa Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:04:58 +0800 Subject: [PATCH 04/16] sctp: create udp4 sock and add its encap_rcv This patch is to add the functions to create/release udp4 sock, and set the sock's encap_rcv to process the incoming udp encap sctp packets. In sctp_udp_rcv(), as we can see, all we need to do is fix the transport header for sctp_rcv(), then it would implement the part of rfc6951#section-5.4: "When an encapsulated packet is received, the UDP header is removed. Then, the generic lookup is performed, as done by an SCTP stack whenever a packet is received, to find the association for the received SCTP packet" Note that these functions will be called in the last patch of this patchset when enabling this feature. v1->v2: - Add pr_err() when fails to create udp v4 sock. v2->v3: - Add 'select NET_UDP_TUNNEL' in sctp Kconfig. v3->v4: - No change. v4->v5: - Change to set udp_port to 0 by default. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- include/net/netns/sctp.h | 5 +++++ include/net/sctp/constants.h | 2 ++ include/net/sctp/sctp.h | 2 ++ net/sctp/Kconfig | 1 + net/sctp/protocol.c | 43 ++++++++++++++++++++++++++++++++++++ 5 files changed, 53 insertions(+) diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index d8d02e4188d1..8cc9aff24329 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -22,6 +22,11 @@ struct netns_sctp { */ struct sock *ctl_sock; + /* UDP tunneling listening sock. */ + struct sock *udp4_sock; + /* UDP tunneling listening port. */ + int udp_port; + /* This is the global local address list. * We actively maintain this complete list of addresses on * the system by catching address add/delete events. diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 122d9e2d8dfd..14a0d22c9113 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -286,6 +286,8 @@ enum { SCTP_MAX_GABS = 16 }; * functions simpler to write. */ +#define SCTP_DEFAULT_UDP_PORT 9899 /* default UDP tunneling port */ + /* These are the values for pf exposure, UNUSED is to keep compatible with old * applications by default. */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 4fc747b778eb..bfd87a0aa2f1 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -84,6 +84,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr, struct sctp_pf *sctp_get_pf_specific(sa_family_t family); int sctp_register_pf(struct sctp_pf *, sa_family_t); void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); +int sctp_udp_sock_start(struct net *net); +void sctp_udp_sock_stop(struct net *net); /* * sctp/socket.c diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 39d7fa9569f8..5da599ff84a9 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -11,6 +11,7 @@ menuconfig IP_SCTP select CRYPTO_HMAC select CRYPTO_SHA1 select LIBCRC32C + select NET_UDP_TUNNEL help Stream Control Transmission Protocol diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 25833238fe93..0f7933422d8e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -44,6 +44,7 @@ #include #include #include +#include #define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024) @@ -840,6 +841,45 @@ static int sctp_ctl_sock_init(struct net *net) return 0; } +static int sctp_udp_rcv(struct sock *sk, struct sk_buff *skb) +{ + skb_set_transport_header(skb, sizeof(struct udphdr)); + sctp_rcv(skb); + return 0; +} + +int sctp_udp_sock_start(struct net *net) +{ + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct udp_port_cfg udp_conf = {0}; + struct socket *sock; + int err; + + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.local_udp_port = htons(net->sctp.udp_port); + err = udp_sock_create(net, &udp_conf, &sock); + if (err) { + pr_err("Failed to create the SCTP UDP tunneling v4 sock\n"); + return err; + } + + tuncfg.encap_type = 1; + tuncfg.encap_rcv = sctp_udp_rcv; + setup_udp_tunnel_sock(net, sock, &tuncfg); + net->sctp.udp4_sock = sock->sk; + + return 0; +} + +void sctp_udp_sock_stop(struct net *net) +{ + if (net->sctp.udp4_sock) { + udp_tunnel_sock_release(net->sctp.udp4_sock->sk_socket); + net->sctp.udp4_sock = NULL; + } +} + /* Register address family specific functions. */ int sctp_register_af(struct sctp_af *af) { @@ -1271,6 +1311,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Enable ECN by default. */ net->sctp.ecn_enable = 1; + /* Set UDP tunneling listening port to 0 by default */ + net->sctp.udp_port = 0; + /* Set SCOPE policy to enabled */ net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE; From 9d6ba260a0734d5e56243929a69d57ebbf0cb245 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:04:59 +0800 Subject: [PATCH 05/16] sctp: create udp6 sock and set its encap_rcv This patch is to add the udp6 sock part in sctp_udp_sock_start/stop(). udp_conf.use_udp6_rx_checksums is set to true, as: "The SCTP checksum MUST be computed for IPv4 and IPv6, and the UDP checksum SHOULD be computed for IPv4 and IPv6" says in rfc6951#section-5.3. v1->v2: - Add pr_err() when fails to create udp v6 sock. - Add #if IS_ENABLED(CONFIG_IPV6) not to create v6 sock when ipv6 is disabled. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- include/net/netns/sctp.h | 1 + net/sctp/protocol.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 8cc9aff24329..247b401a0f52 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -24,6 +24,7 @@ struct netns_sctp { /* UDP tunneling listening sock. */ struct sock *udp4_sock; + struct sock *udp6_sock; /* UDP tunneling listening port. */ int udp_port; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 0f7933422d8e..8410c9a3f145 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -869,6 +869,28 @@ int sctp_udp_sock_start(struct net *net) setup_udp_tunnel_sock(net, sock, &tuncfg); net->sctp.udp4_sock = sock->sk; +#if IS_ENABLED(CONFIG_IPV6) + memset(&udp_conf, 0, sizeof(udp_conf)); + + udp_conf.family = AF_INET6; + udp_conf.local_ip6 = in6addr_any; + udp_conf.local_udp_port = htons(net->sctp.udp_port); + udp_conf.use_udp6_rx_checksums = true; + udp_conf.ipv6_v6only = true; + err = udp_sock_create(net, &udp_conf, &sock); + if (err) { + pr_err("Failed to create the SCTP UDP tunneling v6 sock\n"); + udp_tunnel_sock_release(net->sctp.udp4_sock->sk_socket); + net->sctp.udp4_sock = NULL; + return err; + } + + tuncfg.encap_type = 1; + tuncfg.encap_rcv = sctp_udp_rcv; + setup_udp_tunnel_sock(net, sock, &tuncfg); + net->sctp.udp6_sock = sock->sk; +#endif + return 0; } @@ -878,6 +900,10 @@ void sctp_udp_sock_stop(struct net *net) udp_tunnel_sock_release(net->sctp.udp4_sock->sk_socket); net->sctp.udp4_sock = NULL; } + if (net->sctp.udp6_sock) { + udp_tunnel_sock_release(net->sctp.udp6_sock->sk_socket); + net->sctp.udp6_sock = NULL; + } } /* Register address family specific functions. */ From 89ba49171fb22c130be4786d5ada9dda33dd7801 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:00 +0800 Subject: [PATCH 06/16] sctp: add encap_err_lookup for udp encap socks As it says in rfc6951#section-5.5: "When receiving ICMP or ICMPv6 response packets, there might not be enough bytes in the payload to identify the SCTP association that the SCTP packet triggering the ICMP or ICMPv6 packet belongs to. If a received ICMP or ICMPv6 packet cannot be related to a specific SCTP association or the verification tag cannot be verified, it MUST be discarded silently. In particular, this means that the SCTP stack MUST NOT rely on receiving ICMP or ICMPv6 messages. Implementation constraints could prevent processing received ICMP or ICMPv6 messages." ICMP or ICMPv6 packets need to be handled, and this is implemented by udp encap sock .encap_err_lookup function. The .encap_err_lookup function is called in __udp(6)_lib_err_encap() to confirm this path does need to be updated. For sctp, what we can do here is check if the corresponding asoc and transport exist. Note that icmp packet process for sctp over udp is done by udp sock .encap_err_lookup(), and it means for now we can't do as much as sctp_v4/6_err() does. Also we can't do the two mappings mentioned in rfc6951#section-5.5. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/protocol.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 8410c9a3f145..4d12a0c869b8 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -848,6 +848,23 @@ static int sctp_udp_rcv(struct sock *sk, struct sk_buff *skb) return 0; } +static int sctp_udp_err_lookup(struct sock *sk, struct sk_buff *skb) +{ + struct sctp_association *asoc; + struct sctp_transport *t; + int family; + + skb->transport_header += sizeof(struct udphdr); + family = (ip_hdr(skb)->version == 4) ? AF_INET : AF_INET6; + sk = sctp_err_lookup(dev_net(skb->dev), family, skb, sctp_hdr(skb), + &asoc, &t); + if (!sk) + return -ENOENT; + + sctp_err_finish(sk, t); + return 0; +} + int sctp_udp_sock_start(struct net *net) { struct udp_tunnel_sock_cfg tuncfg = {NULL}; @@ -866,6 +883,7 @@ int sctp_udp_sock_start(struct net *net) tuncfg.encap_type = 1; tuncfg.encap_rcv = sctp_udp_rcv; + tuncfg.encap_err_lookup = sctp_udp_err_lookup; setup_udp_tunnel_sock(net, sock, &tuncfg); net->sctp.udp4_sock = sock->sk; @@ -887,6 +905,7 @@ int sctp_udp_sock_start(struct net *net) tuncfg.encap_type = 1; tuncfg.encap_rcv = sctp_udp_rcv; + tuncfg.encap_err_lookup = sctp_udp_err_lookup; setup_udp_tunnel_sock(net, sock, &tuncfg); net->sctp.udp6_sock = sock->sk; #endif From e8a3001c2120c760017da81a307308572a3cdbbc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:01 +0800 Subject: [PATCH 07/16] sctp: add encap_port for netns sock asoc and transport encap_port is added as per netns/sock/assoc/transport, and the latter one's encap_port inherits the former one's by default. The transport's encap_port value would mostly decide if one packet should go out with udp encapsulated or not. This patch also allows users to set netns' encap_port by sysctl. v1->v2: - Change to define encap_port as __be16 for sctp_sock, asoc and transport. v2->v3: - No change. v3->v4: - Add 'encap_port' entry in ip-sysctl.rst. v4->v5: - Improve the description of encap_port in ip-sysctl.rst. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 16 ++++++++++++++++ include/net/netns/sctp.h | 2 ++ include/net/sctp/structs.h | 6 ++++++ net/sctp/associola.c | 4 ++++ net/sctp/protocol.c | 3 +++ net/sctp/socket.c | 1 + net/sctp/sysctl.c | 10 ++++++++++ 7 files changed, 42 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 25e6673a085a..dad3ba952989 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2642,6 +2642,22 @@ addr_scope_policy - INTEGER Default: 1 +encap_port - INTEGER + The default remote UDP encapsulation port. + + This value is used to set the dest port of the UDP header for the + outgoing UDP-encapsulated SCTP packets by default. Users can also + change the value for each sock/asoc/transport by using setsockopt. + For further information, please refer to RFC6951. + + Note that when connecting to a remote server, the client should set + this to the port that the UDP tunneling sock on the peer server is + listening to and the local UDP tunneling sock on the client also + must be started. On the server, it would get the encap_port from + the incoming packet's source port. + + Default: 0 + ``/proc/sys/net/core/*`` ======================== diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 247b401a0f52..a0f315effa94 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -27,6 +27,8 @@ struct netns_sctp { struct sock *udp6_sock; /* UDP tunneling listening port. */ int udp_port; + /* UDP tunneling remote encap port. */ + int encap_port; /* This is the global local address list. * We actively maintain this complete list of addresses on diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0bdff38eb4bb..aa98e7e659ac 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -178,6 +178,8 @@ struct sctp_sock { */ __u32 hbinterval; + __be16 encap_port; + /* This is the max_retrans value for new associations. */ __u16 pathmaxrxt; @@ -877,6 +879,8 @@ struct sctp_transport { */ unsigned long last_time_ecne_reduced; + __be16 encap_port; + /* This is the max_retrans value for the transport and will * be initialized from the assocs value. This can be changed * using the SCTP_SET_PEER_ADDR_PARAMS socket option. @@ -1790,6 +1794,8 @@ struct sctp_association { */ unsigned long hbinterval; + __be16 encap_port; + /* This is the max_retrans value for new transports in the * association. */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fdb69d46276d..336df4b36655 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -99,6 +99,8 @@ static struct sctp_association *sctp_association_init( */ asoc->hbinterval = msecs_to_jiffies(sp->hbinterval); + asoc->encap_port = sp->encap_port; + /* Initialize path max retrans value. */ asoc->pathmaxrxt = sp->pathmaxrxt; @@ -624,6 +626,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, */ peer->hbinterval = asoc->hbinterval; + peer->encap_port = asoc->encap_port; + /* Set the path max_retrans. */ peer->pathmaxrxt = asoc->pathmaxrxt; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 4d12a0c869b8..89dfd313e113 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1359,6 +1359,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Set UDP tunneling listening port to 0 by default */ net->sctp.udp_port = 0; + /* Set remote encap port to 0 by default */ + net->sctp.encap_port = 0; + /* Set SCOPE policy to enabled */ net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 53d0a4161df3..09b94cd7ca37 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4876,6 +4876,7 @@ static int sctp_init_sock(struct sock *sk) * be modified via SCTP_PEER_ADDR_PARAMS */ sp->hbinterval = net->sctp.hb_interval; + sp->encap_port = htons(net->sctp.encap_port); sp->pathmaxrxt = net->sctp.max_retrans_path; sp->pf_retrans = net->sctp.pf_retrans; sp->ps_retrans = net->sctp.ps_retrans; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index c16c80963e55..ecc1b5e4c297 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -36,6 +36,7 @@ static int rto_alpha_max = 1000; static int rto_beta_max = 1000; static int pf_expose_max = SCTP_PF_EXPOSE_MAX; static int ps_retrans_max = SCTP_PS_RETRANS_MAX; +static int udp_port_max = 65535; static unsigned long max_autoclose_min = 0; static unsigned long max_autoclose_max = @@ -290,6 +291,15 @@ static struct ctl_table sctp_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "encap_port", + .data = &init_net.sctp.encap_port, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = SYSCTL_ZERO, + .extra2 = &udp_port_max, + }, { .procname = "addr_scope_policy", .data = &init_net.sctp.scope_policy, From 8dba29603b5c8bfca2bf90aeb83d05a236df967b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:02 +0800 Subject: [PATCH 08/16] sctp: add SCTP_REMOTE_UDP_ENCAPS_PORT sockopt This patch is to implement: rfc6951#section-6.1: Get or Set the Remote UDP Encapsulation Port Number with the param of the struct: struct sctp_udpencaps { sctp_assoc_t sue_assoc_id; struct sockaddr_storage sue_address; uint16_t sue_port; }; the encap_port of sock, assoc or transport can be changed by users, which also means it allows the different transports of the same asoc to have different encap_port value. v1->v2: - no change. v2->v3: - fix the endian warning when setting values between encap_port and sue_port. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- include/uapi/linux/sctp.h | 7 +++ net/sctp/socket.c | 114 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 28ad40d9acba..cb78e7a739da 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -140,6 +140,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_ECN_SUPPORTED 130 #define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE 131 #define SCTP_EXPOSE_PF_STATE SCTP_EXPOSE_POTENTIALLY_FAILED_STATE +#define SCTP_REMOTE_UDP_ENCAPS_PORT 132 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1197,6 +1198,12 @@ struct sctp_event { uint8_t se_on; }; +struct sctp_udpencaps { + sctp_assoc_t sue_assoc_id; + struct sockaddr_storage sue_address; + uint16_t sue_port; +}; + /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 09b94cd7ca37..2a9ee9b3e46c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4417,6 +4417,55 @@ static int sctp_setsockopt_pf_expose(struct sock *sk, return retval; } +static int sctp_setsockopt_encap_port(struct sock *sk, + struct sctp_udpencaps *encap, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_transport *t; + __be16 encap_port; + + if (optlen != sizeof(*encap)) + return -EINVAL; + + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. + */ + encap_port = (__force __be16)encap->sue_port; + if (!sctp_is_any(sk, (union sctp_addr *)&encap->sue_address)) { + t = sctp_addr_id2transport(sk, &encap->sue_address, + encap->sue_assoc_id); + if (!t) + return -EINVAL; + + t->encap_port = encap_port; + return 0; + } + + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. + */ + asoc = sctp_id2assoc(sk, encap->sue_assoc_id); + if (!asoc && encap->sue_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + /* If changes are for association, also apply encap_port to + * each transport. + */ + if (asoc) { + list_for_each_entry(t, &asoc->peer.transport_addr_list, + transports) + t->encap_port = encap_port; + + return 0; + } + + sctp_sk(sk)->encap_port = encap_port; + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4636,6 +4685,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: retval = sctp_setsockopt_pf_expose(sk, kopt, optlen); break; + case SCTP_REMOTE_UDP_ENCAPS_PORT: + retval = sctp_setsockopt_encap_port(sk, kopt, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -7791,6 +7843,65 @@ static int sctp_getsockopt_pf_expose(struct sock *sk, int len, return retval; } +static int sctp_getsockopt_encap_port(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_association *asoc; + struct sctp_udpencaps encap; + struct sctp_transport *t; + __be16 encap_port; + + if (len < sizeof(encap)) + return -EINVAL; + + len = sizeof(encap); + if (copy_from_user(&encap, optval, len)) + return -EFAULT; + + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. + */ + if (!sctp_is_any(sk, (union sctp_addr *)&encap.sue_address)) { + t = sctp_addr_id2transport(sk, &encap.sue_address, + encap.sue_assoc_id); + if (!t) { + pr_debug("%s: failed no transport\n", __func__); + return -EINVAL; + } + + encap_port = t->encap_port; + goto out; + } + + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. + */ + asoc = sctp_id2assoc(sk, encap.sue_assoc_id); + if (!asoc && encap.sue_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + pr_debug("%s: failed no association\n", __func__); + return -EINVAL; + } + + if (asoc) { + encap_port = asoc->encap_port; + goto out; + } + + encap_port = sctp_sk(sk)->encap_port; + +out: + encap.sue_port = (__force uint16_t)encap_port; + if (copy_to_user(optval, &encap, len)) + return -EFAULT; + + if (put_user(len, optlen)) + return -EFAULT; + + return 0; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -8011,6 +8122,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: retval = sctp_getsockopt_pf_expose(sk, len, optval, optlen); break; + case SCTP_REMOTE_UDP_ENCAPS_PORT: + retval = sctp_getsockopt_encap_port(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; From a1dd2cf2f1aedabc2ca9bb4f90231a521c52d8eb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:03 +0800 Subject: [PATCH 09/16] sctp: allow changing transport encap_port by peer packets As rfc6951#section-5.4 says: "After finding the SCTP association (which includes checking the verification tag), the UDP source port MUST be stored as the encapsulation port for the destination address the SCTP packet is received from (see Section 5.1). When a non-encapsulated SCTP packet is received by the SCTP stack, the encapsulation of outgoing packets belonging to the same association and the corresponding destination address MUST be disabled." transport encap_port should be updated by a validated incoming packet's udp src port. We save the udp src port in sctp_input_cb->encap_port, and then update the transport in two places: 1. right after vtag is verified, which is required by RFC, and this allows the existent transports to be updated by the chunks that can only be processed on an asoc. 2. right before processing the 'init' where the transports are added, and this allows building a sctp over udp connection by client with the server not knowing the remote encap port. 3. when processing ootb_pkt and creating the temporary transport for the reply pkt. Note that sctp_input_cb->header is removed, as it's not used any more in sctp. v1->v2: - Change encap_port as __be16 for sctp_input_cb. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- include/net/sctp/sm.h | 1 + include/net/sctp/structs.h | 7 +------ net/sctp/ipv6.c | 1 + net/sctp/protocol.c | 11 ++++++++++- net/sctp/sm_make_chunk.c | 1 + net/sctp/sm_statefuns.c | 2 ++ 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 5c491a3bc27e..a499341472ad 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -380,6 +380,7 @@ sctp_vtag_verify(const struct sctp_chunk *chunk, if (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag) return 1; + chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; return 0; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index aa98e7e659ac..81464ae2b137 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1120,14 +1120,9 @@ static inline void sctp_outq_cork(struct sctp_outq *q) * sctp_input_cb is currently used on rx and sock rx queue */ struct sctp_input_cb { - union { - struct inet_skb_parm h4; -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_skb_parm h6; -#endif - } header; struct sctp_chunk *chunk; struct sctp_af *af; + __be16 encap_port; }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 8a58f42d6d19..a064bf252b17 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1053,6 +1053,7 @@ static struct inet_protosw sctpv6_stream_protosw = { static int sctp6_rcv(struct sk_buff *skb) { + memset(skb->cb, 0, sizeof(skb->cb)); return sctp_rcv(skb) ? -1 : 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 89dfd313e113..f3de8c03a15e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -843,6 +843,9 @@ static int sctp_ctl_sock_init(struct net *net) static int sctp_udp_rcv(struct sock *sk, struct sk_buff *skb) { + memset(skb->cb, 0, sizeof(skb->cb)); + SCTP_INPUT_CB(skb)->encap_port = udp_hdr(skb)->source; + skb_set_transport_header(skb, sizeof(struct udphdr)); sctp_rcv(skb); return 0; @@ -1139,9 +1142,15 @@ static struct inet_protosw sctp_stream_protosw = { .flags = SCTP_PROTOSW_FLAG }; +static int sctp4_rcv(struct sk_buff *skb) +{ + memset(skb->cb, 0, sizeof(skb->cb)); + return sctp_rcv(skb); +} + /* Register with IP layer. */ static const struct net_protocol sctp_protocol = { - .handler = sctp_rcv, + .handler = sctp4_rcv, .err_handler = sctp_v4_err, .no_policy = 1, .netns_ok = 1, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9a56ae2f3651..21d0ff1c6ab9 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2321,6 +2321,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * added as the primary transport. The source address seems to * be a better choice than any of the embedded addresses. */ + asoc->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; if (!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) goto nomem; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index c669f8bd1eab..8edab1533057 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6268,6 +6268,8 @@ static struct sctp_packet *sctp_ootb_pkt_new( if (!transport) goto nomem; + transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; + /* Cache a route for the transport with the chunk's destination as * the source address. */ From f1bfe8b5415171b5e70c2a47d399c91bd7c2752e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:04 +0800 Subject: [PATCH 10/16] sctp: add udphdr to overhead when udp_port is set sctp_mtu_payload() is for calculating the frag size before making chunks from a msg. So we should only add udphdr size to overhead when udp socks are listening, as only then sctp can handle the incoming sctp over udp packets and outgoing sctp over udp packets will be possible. Note that we can't do this according to transport->encap_port, as different transports may be set to different values, while the chunks were made before choosing the transport, we could not be able to meet all rfc6951#section-5.6 recommends. v1->v2: - Add udp_port for sctp_sock to avoid a potential race issue, it will be used in xmit path in the next patch. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- include/net/sctp/sctp.h | 7 +++++-- include/net/sctp/structs.h | 1 + net/sctp/socket.c | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index bfd87a0aa2f1..86f74f2fe6de 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -578,10 +578,13 @@ static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp, { __u32 overhead = sizeof(struct sctphdr) + extra; - if (sp) + if (sp) { overhead += sp->pf->af->net_header_len; - else + if (sp->udp_port) + overhead += sizeof(struct udphdr); + } else { overhead += sizeof(struct ipv6hdr); + } if (WARN_ON_ONCE(mtu && mtu <= overhead)) mtu = overhead; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 81464ae2b137..80f71499b543 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -178,6 +178,7 @@ struct sctp_sock { */ __u32 hbinterval; + __be16 udp_port; __be16 encap_port; /* This is the max_retrans value for new associations. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2a9ee9b3e46c..a710917c5ac7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4928,6 +4928,7 @@ static int sctp_init_sock(struct sock *sk) * be modified via SCTP_PEER_ADDR_PARAMS */ sp->hbinterval = net->sctp.hb_interval; + sp->udp_port = htons(net->sctp.udp_port); sp->encap_port = htons(net->sctp.encap_port); sp->pathmaxrxt = net->sctp.max_retrans_path; sp->pf_retrans = net->sctp.pf_retrans; From bcd623d8e9fa5f82bbd8cd464dc418d24139157b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:05 +0800 Subject: [PATCH 11/16] sctp: call sk_setup_caps in sctp_packet_transmit instead sk_setup_caps() was originally called in Commit 90017accff61 ("sctp: Add GSO support"), as: "We have to refresh this in case we are xmiting to more than one transport at a time" This actually happens in the loop of sctp_outq_flush_transports(), and it shouldn't be tied to gso, so move it out of gso part and before sctp_packet_pack(). Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/output.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 1441eaf460bb..fb1650095326 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -508,12 +508,6 @@ static int sctp_packet_pack(struct sctp_packet *packet, sizeof(struct inet6_skb_parm))); skb_shinfo(head)->gso_segs = pkt_count; skb_shinfo(head)->gso_size = GSO_BY_FRAGS; - rcu_read_lock(); - if (skb_dst(head) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } - rcu_read_unlock(); goto chksum; } @@ -593,6 +587,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) } skb_dst_set(head, dst); + rcu_read_lock(); + if (__sk_dst_get(sk) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); + } + rcu_read_unlock(); + /* pack up chunks */ pkt_count = sctp_packet_pack(packet, head, gso, gfp); if (!pkt_count) { From 600af7fd809ad2a307b6c53b2a3e45453a75cdb6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:06 +0800 Subject: [PATCH 12/16] sctp: support for sending packet over udp4 sock This patch does what the rfc6951#section-5.3 says for ipv4: "Within the UDP header, the source port MUST be the local UDP encapsulation port number of the SCTP stack, and the destination port MUST be the remote UDP encapsulation port number maintained for the association and the destination address to which the packet is sent (see Section 5.1). Because the SCTP packet is the UDP payload, the length of the UDP packet MUST be the length of the SCTP packet plus the size of the UDP header. The SCTP checksum MUST be computed for IPv4 and IPv6, and the UDP checksum SHOULD be computed for IPv4 and IPv6." Some places need to be adjusted in sctp_packet_transmit(): 1. For non-gso packets, when transport's encap_port is set, sctp checksum has to be done in sctp_packet_pack(), as the outer udp will use ip_summed = CHECKSUM_PARTIAL to do the offload setting for checksum. 2. Delay calling dst_clone() and skb_dst_set() for non-udp packets until sctp_v4_xmit(), as for udp packets, skb_dst_set() is not needed before calling udp_tunnel_xmit_skb(). then in sctp_v4_xmit(): 1. Go to udp_tunnel_xmit_skb() only when transport->encap_port and net->sctp.udp_port both are set, as these are one for dst port and another for src port. 2. For gso packet, SKB_GSO_UDP_TUNNEL_CSUM is set for gso_type, and with this udp checksum can be done in __skb_udp_tunnel_segment() for each segments after the sctp gso. 3. inner_mac_header and inner_transport_header are set, as these will be needed in __skb_udp_tunnel_segment() to find the right headers. 4. df and ttl are calculated, as these are the required params by udp_tunnel_xmit_skb(). 5. nocheck param has to be false, as "the UDP checksum SHOULD be computed for IPv4 and IPv6", says in rfc6951#section-5.3. v1->v2: - Use sp->udp_port instead in sctp_v4_xmit(), which is more safe. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/output.c | 9 +++------ net/sctp/protocol.c | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index fb1650095326..6614c9fdc51e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -514,8 +514,8 @@ static int sctp_packet_pack(struct sctp_packet *packet, if (sctp_checksum_disable) return 1; - if (!(skb_dst(head)->dev->features & NETIF_F_SCTP_CRC) || - dst_xfrm(skb_dst(head)) || packet->ipfragok) { + if (!(tp->dst->dev->features & NETIF_F_SCTP_CRC) || + dst_xfrm(tp->dst) || packet->ipfragok || tp->encap_port) { struct sctphdr *sh = (struct sctphdr *)skb_transport_header(head); @@ -542,7 +542,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) struct sctp_association *asoc = tp->asoc; struct sctp_chunk *chunk, *tmp; int pkt_count, gso = 0; - struct dst_entry *dst; struct sk_buff *head; struct sctphdr *sh; struct sock *sk; @@ -579,13 +578,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sh->checksum = 0; /* drop packet if no dst */ - dst = dst_clone(tp->dst); - if (!dst) { + if (!tp->dst) { IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(head); goto out; } - skb_dst_set(head, dst); rcu_read_lock(); if (__sk_dst_get(sk) != tp->dst) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f3de8c03a15e..41f287a13b54 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1059,25 +1059,44 @@ static int sctp_inet_supported_addrs(const struct sctp_sock *opt, } /* Wrapper routine that calls the ip transmit routine. */ -static inline int sctp_v4_xmit(struct sk_buff *skb, - struct sctp_transport *transport) +static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) { - struct inet_sock *inet = inet_sk(skb->sk); + struct dst_entry *dst = dst_clone(t->dst); + struct flowi4 *fl4 = &t->fl.u.ip4; + struct sock *sk = skb->sk; + struct inet_sock *inet = inet_sk(sk); __u8 dscp = inet->tos; + __be16 df = 0; pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb, - skb->len, &transport->fl.u.ip4.saddr, - &transport->fl.u.ip4.daddr); + skb->len, &fl4->saddr, &fl4->daddr); - if (transport->dscp & SCTP_DSCP_SET_MASK) - dscp = transport->dscp & SCTP_DSCP_VAL_MASK; + if (t->dscp & SCTP_DSCP_SET_MASK) + dscp = t->dscp & SCTP_DSCP_VAL_MASK; - inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? - IP_PMTUDISC_DO : IP_PMTUDISC_DONT; + inet->pmtudisc = t->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO + : IP_PMTUDISC_DONT; + SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); - SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); + if (!t->encap_port || !sctp_sk(sk)->udp_port) { + skb_dst_set(skb, dst); + return __ip_queue_xmit(sk, skb, &t->fl, dscp); + } - return __ip_queue_xmit(&inet->sk, skb, &transport->fl, dscp); + if (skb_is_gso(skb)) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + + if (ip_dont_fragment(sk, dst) && !skb->ignore_df) + df = htons(IP_DF); + + skb->encapsulation = 1; + skb_reset_inner_mac_header(skb); + skb_reset_inner_transport_header(skb); + skb_set_inner_ipproto(skb, IPPROTO_SCTP); + udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr, + fl4->daddr, dscp, ip4_dst_hoplimit(dst), df, + sctp_sk(sk)->udp_port, t->encap_port, false, false); + return 0; } static struct sctp_af sctp_af_inet; From 259db53ba5933a2a507238c47ffa52c906c27df0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:07 +0800 Subject: [PATCH 13/16] sctp: support for sending packet over udp6 sock This one basically does the similar things in sctp_v6_xmit as does for udp4 sock in the last patch, just note that: 1. label needs to be calculated, as it's the param of udp_tunnel6_xmit_skb(). 2. The 'nocheck' param of udp_tunnel6_xmit_skb() is false, as required by RFC. v1->v2: - Use sp->udp_port instead in sctp_v6_xmit(), which is more safe. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/ipv6.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index a064bf252b17..814754d7cab5 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -55,6 +55,7 @@ #include #include #include +#include #include @@ -191,33 +192,53 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return ret; } -static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) +static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) { + struct dst_entry *dst = dst_clone(t->dst); + struct flowi6 *fl6 = &t->fl.u.ip6; struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi6 *fl6 = &transport->fl.u.ip6; __u8 tclass = np->tclass; - int res; + __be32 label; pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, &fl6->saddr, &fl6->daddr); - if (transport->dscp & SCTP_DSCP_SET_MASK) - tclass = transport->dscp & SCTP_DSCP_VAL_MASK; + if (t->dscp & SCTP_DSCP_SET_MASK) + tclass = t->dscp & SCTP_DSCP_VAL_MASK; if (INET_ECN_is_capable(tclass)) IP6_ECN_flow_xmit(sk, fl6->flowlabel); - if (!(transport->param_flags & SPP_PMTUD_ENABLE)) + if (!(t->param_flags & SPP_PMTUD_ENABLE)) skb->ignore_df = 1; SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); - rcu_read_lock(); - res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), - tclass, sk->sk_priority); - rcu_read_unlock(); - return res; + if (!t->encap_port || !sctp_sk(sk)->udp_port) { + int res; + + skb_dst_set(skb, dst); + rcu_read_lock(); + res = ip6_xmit(sk, skb, fl6, sk->sk_mark, + rcu_dereference(np->opt), + tclass, sk->sk_priority); + rcu_read_unlock(); + return res; + } + + if (skb_is_gso(skb)) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + + skb->encapsulation = 1; + skb_reset_inner_mac_header(skb); + skb_reset_inner_transport_header(skb); + skb_set_inner_ipproto(skb, IPPROTO_SCTP); + label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6); + + return udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, + &fl6->daddr, tclass, ip6_dst_hoplimit(dst), + label, sctp_sk(sk)->udp_port, t->encap_port, false); } /* Returns the dst cache entry for the given source and destination ip From e38d86b354f96e3ed6f5e6c7dbb442d7107fc0bd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:08 +0800 Subject: [PATCH 14/16] sctp: add the error cause for new encapsulation port restart This patch is to add the function to make the abort chunk with the error cause for new encapsulation port restart, defined on Section 4.4 in draft-tuexen-tsvwg-sctp-udp-encaps-cons-03. v1->v2: - no change. v2->v3: - no need to call htons() when setting nep.cur_port/new_port. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- include/linux/sctp.h | 20 ++++++++++++++++++++ include/net/sctp/sm.h | 3 +++ net/sctp/sm_make_chunk.c | 20 ++++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 76731230bbc5..bb1926589693 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -482,11 +482,13 @@ enum sctp_error { * 11 Restart of an association with new addresses * 12 User Initiated Abort * 13 Protocol Violation + * 14 Restart of an Association with New Encapsulation Port */ SCTP_ERROR_RESTART = cpu_to_be16(0x0b), SCTP_ERROR_USER_ABORT = cpu_to_be16(0x0c), SCTP_ERROR_PROTO_VIOLATION = cpu_to_be16(0x0d), + SCTP_ERROR_NEW_ENCAP_PORT = cpu_to_be16(0x0e), /* ADDIP Section 3.3 New Error Causes * @@ -793,4 +795,22 @@ enum { SCTP_FLOWLABEL_VAL_MASK = 0xfffff }; +/* UDP Encapsulation + * draft-tuexen-tsvwg-sctp-udp-encaps-cons-03.html#section-4-4 + * + * The error cause indicating an "Restart of an Association with + * New Encapsulation Port" + * + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cause Code = 14 | Cause Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Current Encapsulation Port | New Encapsulation Port | + * +-------------------------------+-------------------------------+ + */ +struct sctp_new_encap_port_hdr { + __be16 cur_port; + __be16 new_port; +}; + #endif /* __LINUX_SCTP_H__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index a499341472ad..fd223c94589a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -221,6 +221,9 @@ struct sctp_chunk *sctp_make_violation_paramlen( struct sctp_chunk *sctp_make_violation_max_retrans( const struct sctp_association *asoc, const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_new_encap_port( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport); struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 21d0ff1c6ab9..f77484df097b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1142,6 +1142,26 @@ struct sctp_chunk *sctp_make_violation_max_retrans( return retval; } +struct sctp_chunk *sctp_make_new_encap_port(const struct sctp_association *asoc, + const struct sctp_chunk *chunk) +{ + struct sctp_new_encap_port_hdr nep; + struct sctp_chunk *retval; + + retval = sctp_make_abort(asoc, chunk, + sizeof(struct sctp_errhdr) + sizeof(nep)); + if (!retval) + goto nodata; + + sctp_init_cause(retval, SCTP_ERROR_NEW_ENCAP_PORT, sizeof(nep)); + nep.cur_port = SCTP_INPUT_CB(chunk->skb)->encap_port; + nep.new_port = chunk->transport->encap_port; + sctp_addto_chunk(retval, sizeof(nep), &nep); + +nodata: + return retval; +} + /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport) From 1c16a1862595e66d22d34799a9f328defc884369 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:09 +0800 Subject: [PATCH 15/16] sctp: handle the init chunk matching an existing asoc This is from Section 4 of draft-tuexen-tsvwg-sctp-udp-encaps-cons-03, and it requires responding with an abort chunk with an error cause when the udp source port of the received init chunk doesn't match the encap port of the transport. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8edab1533057..af2b7041fa4e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -87,6 +87,13 @@ static enum sctp_disposition sctp_sf_tabort_8_4_8( const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); +static enum sctp_disposition sctp_sf_new_encap_port( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); static enum sctp_disposition sctp_stop_t1_and_abort( @@ -1493,6 +1500,10 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + + if (SCTP_INPUT_CB(chunk->skb)->encap_port != chunk->transport->encap_port) + return sctp_sf_new_encap_port(net, ep, asoc, type, arg, commands); + /* Grab the INIT header. */ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; @@ -3392,6 +3403,45 @@ static enum sctp_disposition sctp_sf_tabort_8_4_8( sctp_packet_append_chunk(packet, abort); + sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); + + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); + + sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return SCTP_DISPOSITION_CONSUME; +} + +/* Handling of SCTP Packets Containing an INIT Chunk Matching an + * Existing Associations when the UDP encap port is incorrect. + * + * From Section 4 at draft-tuexen-tsvwg-sctp-udp-encaps-cons-03. + */ +static enum sctp_disposition sctp_sf_new_encap_port( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) +{ + struct sctp_packet *packet = NULL; + struct sctp_chunk *chunk = arg; + struct sctp_chunk *abort; + + packet = sctp_ootb_pkt_new(net, asoc, chunk); + if (!packet) + return SCTP_DISPOSITION_NOMEM; + + abort = sctp_make_new_encap_port(asoc, chunk); + if (!abort) { + sctp_ootb_pkt_free(packet); + return SCTP_DISPOSITION_NOMEM; + } + + abort->skb->sk = ep->base.sk; + + sctp_packet_append_chunk(packet, abort); + sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); From 046c052b475e7119b6a30e3483e2888fc606a2f8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:10 +0800 Subject: [PATCH 16/16] sctp: enable udp tunneling socks This patch is to enable udp tunneling socks by calling sctp_udp_sock_start() in sctp_ctrlsock_init(), and sctp_udp_sock_stop() in sctp_ctrlsock_exit(). Also add sysctl udp_port to allow changing the listening sock's port by users. Wit this patch, the whole sctp over udp feature can be enabled and used. v1->v2: - Also update ctl_sock udp_port in proc_sctp_do_udp_port() where netns udp_port gets changed. v2->v3: - Call htons() when setting sk udp_port from netns udp_port. v3->v4: - Not call sctp_udp_sock_start() when new_value is 0. - Add udp_port entry in ip-sysctl.rst. v4->v5: - Not call sctp_udp_sock_start/stop() in sctp_ctrlsock_init/exit(). - Improve the description of udp_port in ip-sysctl.rst. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 15 ++++++++ net/sctp/sysctl.c | 52 ++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index dad3ba952989..2aaf40b2d2cd 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2642,6 +2642,21 @@ addr_scope_policy - INTEGER Default: 1 +udp_port - INTEGER + The listening port for the local UDP tunneling sock. Normally it's + using the IANA-assigned UDP port number 9899 (sctp-tunneling). + + This UDP sock is used for processing the incoming UDP-encapsulated + SCTP packets (from RFC6951), and shared by all applications in the + same net namespace. This UDP sock will be closed when the value is + set to 0. + + The value will also be used to set the src port of the UDP header + for the outgoing UDP-encapsulated SCTP packets. For the dest port, + please refer to 'encap_port' below. + + Default: 0 + encap_port - INTEGER The default remote UDP encapsulation port. diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ecc1b5e4c297..e92df779af73 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -49,6 +49,8 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write, void *buffer, + size_t *lenp, loff_t *ppos); static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_auth(struct ctl_table *ctl, int write, @@ -291,6 +293,15 @@ static struct ctl_table sctp_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "udp_port", + .data = &init_net.sctp.udp_port, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_sctp_do_udp_port, + .extra1 = SYSCTL_ZERO, + .extra2 = &udp_port_max, + }, { .procname = "encap_port", .data = &init_net.sctp.encap_port, @@ -487,6 +498,47 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write, return ret; } +static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + unsigned int min = *(unsigned int *)ctl->extra1; + unsigned int max = *(unsigned int *)ctl->extra2; + struct ctl_table tbl; + int ret, new_value; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.udp_port; + + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { + struct sock *sk = net->sctp.ctl_sock; + + if (new_value > max || new_value < min) + return -EINVAL; + + net->sctp.udp_port = new_value; + sctp_udp_sock_stop(net); + if (new_value) { + ret = sctp_udp_sock_start(net); + if (ret) + net->sctp.udp_port = 0; + } + + /* Update the value in the control socket */ + lock_sock(sk); + sctp_sk(sk)->udp_port = htons(net->sctp.udp_port); + release_sock(sk); + } + + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table;