mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-03 21:45:08 -04:00
Merge branch 'vsock-virtio' into main
Luigi Leonardi says: ==================== vsock: avoid queuing on intermediate queue if possible This series introduces an optimization for vsock/virtio to reduce latency and increase the throughput: When the guest sends a packet to the host, and the intermediate queue (send_pkt_queue) is empty, if there is enough space, the packet is put directly in the virtqueue. v3->v4 While running experiments on fio with 64B payload, I realized that there was a mistake in my fio configuration, so I re-ran all the experiments and now the latency numbers are indeed lower with the patch applied. I also noticed that I was kicking the host without the lock. - Fixed a configuration mistake on fio and re-ran all experiments. - Fio latency measurement using 64B payload. - virtio_transport_send_skb_fast_path sends kick with the tx_lock acquired - Addressed all minor style changes requested by maintainer. - Rebased on latest net-next - Link to v3: https://lore.kernel.org/r/20240711-pinna-v3-0-697d4164fe80@outlook.com v2->v3 - Performed more experiments using iperf3 using multiple streams - Handling of reply packets removed from virtio_transport_send_skb, as is needed just by the worker. - Removed atomic_inc/atomic_sub when queuing directly to the vq. - Introduced virtio_transport_send_skb_fast_path that handles the steps for sending on the vq. - Fixed a missing mutex_unlock in error path. - Changed authorship of the second commit - Rebased on latest net-next v1->v2 In this v2 I replaced a mutex_lock with a mutex_trylock because it was insidea RCU critical section. I also added a check on tx_run, so if the module is being removed the packet is not queued. I'd like to thank Stefano for reporting the tx_run issue. Applied all Stefano's suggestions: - Minor code style changes - Minor commit text rewrite Performed more experiments: - Check if all the packets go directly to the vq (Matias' suggestion) - Used iperf3 to see if there is any improvement in overall throughput from guest to host - Pinned the vhost process to a pCPU. - Run fio using 512B payload Rebased on latest net-next ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -244,7 +244,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
|
||||
restart_tx = true;
|
||||
}
|
||||
|
||||
consume_skb(skb);
|
||||
virtio_transport_consume_skb_sent(skb, true);
|
||||
}
|
||||
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
|
||||
if (added)
|
||||
@@ -451,6 +451,8 @@ static struct virtio_transport vhost_transport = {
|
||||
.notify_buffer_size = virtio_transport_notify_buffer_size,
|
||||
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
|
||||
|
||||
.unsent_bytes = virtio_transport_unsent_bytes,
|
||||
|
||||
.read_skb = virtio_transport_read_skb,
|
||||
},
|
||||
|
||||
|
||||
@@ -133,6 +133,7 @@ struct virtio_vsock_sock {
|
||||
u32 tx_cnt;
|
||||
u32 peer_fwd_cnt;
|
||||
u32 peer_buf_alloc;
|
||||
size_t bytes_unsent;
|
||||
|
||||
/* Protected by rx_lock */
|
||||
u32 fwd_cnt;
|
||||
@@ -193,6 +194,11 @@ s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
|
||||
s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
|
||||
u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk);
|
||||
|
||||
ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk);
|
||||
|
||||
void virtio_transport_consume_skb_sent(struct sk_buff *skb,
|
||||
bool consume);
|
||||
|
||||
int virtio_transport_do_socket_init(struct vsock_sock *vsk,
|
||||
struct vsock_sock *psk);
|
||||
int
|
||||
|
||||
@@ -169,6 +169,9 @@ struct vsock_transport {
|
||||
void (*notify_buffer_size)(struct vsock_sock *, u64 *);
|
||||
int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val);
|
||||
|
||||
/* SIOCOUTQ ioctl */
|
||||
ssize_t (*unsent_bytes)(struct vsock_sock *vsk);
|
||||
|
||||
/* Shutdown. */
|
||||
int (*shutdown)(struct vsock_sock *, int);
|
||||
|
||||
|
||||
@@ -112,6 +112,7 @@
|
||||
#include <net/sock.h>
|
||||
#include <net/af_vsock.h>
|
||||
#include <uapi/linux/vm_sockets.h>
|
||||
#include <uapi/asm-generic/ioctls.h>
|
||||
|
||||
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
|
||||
static void vsock_sk_destruct(struct sock *sk);
|
||||
@@ -1292,6 +1293,57 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
|
||||
|
||||
static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
|
||||
int __user *arg)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct vsock_sock *vsk;
|
||||
int ret;
|
||||
|
||||
vsk = vsock_sk(sk);
|
||||
|
||||
switch (cmd) {
|
||||
case SIOCOUTQ: {
|
||||
ssize_t n_bytes;
|
||||
|
||||
if (!vsk->transport || !vsk->transport->unsent_bytes) {
|
||||
ret = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
n_bytes = vsk->transport->unsent_bytes(vsk);
|
||||
if (n_bytes < 0) {
|
||||
ret = n_bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = put_user(n_bytes, arg);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ret = -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vsock_ioctl(struct socket *sock, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
lock_sock(sock->sk);
|
||||
ret = vsock_do_ioctl(sock, cmd, (int __user *)arg);
|
||||
release_sock(sock->sk);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct proto_ops vsock_dgram_ops = {
|
||||
.family = PF_VSOCK,
|
||||
.owner = THIS_MODULE,
|
||||
@@ -1302,7 +1354,7 @@ static const struct proto_ops vsock_dgram_ops = {
|
||||
.accept = sock_no_accept,
|
||||
.getname = vsock_getname,
|
||||
.poll = vsock_poll,
|
||||
.ioctl = sock_no_ioctl,
|
||||
.ioctl = vsock_ioctl,
|
||||
.listen = sock_no_listen,
|
||||
.shutdown = vsock_shutdown,
|
||||
.sendmsg = vsock_dgram_sendmsg,
|
||||
@@ -2286,7 +2338,7 @@ static const struct proto_ops vsock_stream_ops = {
|
||||
.accept = vsock_accept,
|
||||
.getname = vsock_getname,
|
||||
.poll = vsock_poll,
|
||||
.ioctl = sock_no_ioctl,
|
||||
.ioctl = vsock_ioctl,
|
||||
.listen = vsock_listen,
|
||||
.shutdown = vsock_shutdown,
|
||||
.setsockopt = vsock_connectible_setsockopt,
|
||||
@@ -2308,7 +2360,7 @@ static const struct proto_ops vsock_seqpacket_ops = {
|
||||
.accept = vsock_accept,
|
||||
.getname = vsock_getname,
|
||||
.poll = vsock_poll,
|
||||
.ioctl = sock_no_ioctl,
|
||||
.ioctl = vsock_ioctl,
|
||||
.listen = vsock_listen,
|
||||
.shutdown = vsock_shutdown,
|
||||
.setsockopt = vsock_connectible_setsockopt,
|
||||
|
||||
@@ -311,7 +311,7 @@ static void virtio_transport_tx_work(struct work_struct *work)
|
||||
|
||||
virtqueue_disable_cb(vq);
|
||||
while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
|
||||
consume_skb(skb);
|
||||
virtio_transport_consume_skb_sent(skb, true);
|
||||
added = true;
|
||||
}
|
||||
} while (!virtqueue_enable_cb(vq));
|
||||
@@ -540,6 +540,8 @@ static struct virtio_transport virtio_transport = {
|
||||
.notify_buffer_size = virtio_transport_notify_buffer_size,
|
||||
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
|
||||
|
||||
.unsent_bytes = virtio_transport_unsent_bytes,
|
||||
|
||||
.read_skb = virtio_transport_read_skb,
|
||||
},
|
||||
|
||||
|
||||
@@ -463,6 +463,26 @@ void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
|
||||
|
||||
void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume)
|
||||
{
|
||||
struct sock *s = skb->sk;
|
||||
|
||||
if (s && skb->len) {
|
||||
struct vsock_sock *vs = vsock_sk(s);
|
||||
struct virtio_vsock_sock *vvs;
|
||||
|
||||
vvs = vs->trans;
|
||||
|
||||
spin_lock_bh(&vvs->tx_lock);
|
||||
vvs->bytes_unsent -= skb->len;
|
||||
spin_unlock_bh(&vvs->tx_lock);
|
||||
}
|
||||
|
||||
if (consume)
|
||||
consume_skb(skb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent);
|
||||
|
||||
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
|
||||
{
|
||||
u32 ret;
|
||||
@@ -475,6 +495,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
|
||||
if (ret > credit)
|
||||
ret = credit;
|
||||
vvs->tx_cnt += ret;
|
||||
vvs->bytes_unsent += ret;
|
||||
spin_unlock_bh(&vvs->tx_lock);
|
||||
|
||||
return ret;
|
||||
@@ -488,6 +509,7 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
|
||||
|
||||
spin_lock_bh(&vvs->tx_lock);
|
||||
vvs->tx_cnt -= credit;
|
||||
vvs->bytes_unsent -= credit;
|
||||
spin_unlock_bh(&vvs->tx_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
|
||||
@@ -1090,6 +1112,19 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(virtio_transport_destruct);
|
||||
|
||||
ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
|
||||
{
|
||||
struct virtio_vsock_sock *vvs = vsk->trans;
|
||||
size_t ret;
|
||||
|
||||
spin_lock_bh(&vvs->tx_lock);
|
||||
ret = vvs->bytes_unsent;
|
||||
spin_unlock_bh(&vvs->tx_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
|
||||
|
||||
static int virtio_transport_reset(struct vsock_sock *vsk,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
|
||||
@@ -98,6 +98,8 @@ static struct virtio_transport loopback_transport = {
|
||||
.notify_buffer_size = virtio_transport_notify_buffer_size,
|
||||
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
|
||||
|
||||
.unsent_bytes = virtio_transport_unsent_bytes,
|
||||
|
||||
.read_skb = virtio_transport_read_skb,
|
||||
},
|
||||
|
||||
@@ -123,6 +125,10 @@ static void vsock_loopback_work(struct work_struct *work)
|
||||
spin_unlock_bh(&vsock->pkt_queue.lock);
|
||||
|
||||
while ((skb = __skb_dequeue(&pkts))) {
|
||||
/* Decrement the bytes_unsent counter without deallocating skb
|
||||
* It is freed by the receiver.
|
||||
*/
|
||||
virtio_transport_consume_skb_sent(skb, false);
|
||||
virtio_transport_deliver_tap_pkt(skb);
|
||||
virtio_transport_recv_pkt(&loopback_transport, skb);
|
||||
}
|
||||
|
||||
@@ -139,7 +139,7 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po
|
||||
}
|
||||
|
||||
/* Connect to <cid, port> and return the file descriptor. */
|
||||
static int vsock_connect(unsigned int cid, unsigned int port, int type)
|
||||
int vsock_connect(unsigned int cid, unsigned int port, int type)
|
||||
{
|
||||
union {
|
||||
struct sockaddr sa;
|
||||
@@ -226,8 +226,8 @@ static int vsock_listen(unsigned int cid, unsigned int port, int type)
|
||||
/* Listen on <cid, port> and return the first incoming connection. The remote
|
||||
* address is stored to clientaddrp. clientaddrp may be NULL.
|
||||
*/
|
||||
static int vsock_accept(unsigned int cid, unsigned int port,
|
||||
struct sockaddr_vm *clientaddrp, int type)
|
||||
int vsock_accept(unsigned int cid, unsigned int port,
|
||||
struct sockaddr_vm *clientaddrp, int type)
|
||||
{
|
||||
union {
|
||||
struct sockaddr sa;
|
||||
|
||||
@@ -39,6 +39,9 @@ struct test_case {
|
||||
void init_signals(void);
|
||||
unsigned int parse_cid(const char *str);
|
||||
unsigned int parse_port(const char *str);
|
||||
int vsock_connect(unsigned int cid, unsigned int port, int type);
|
||||
int vsock_accept(unsigned int cid, unsigned int port,
|
||||
struct sockaddr_vm *clientaddrp, int type);
|
||||
int vsock_stream_connect(unsigned int cid, unsigned int port);
|
||||
int vsock_bind_connect(unsigned int cid, unsigned int port,
|
||||
unsigned int bind_port, int type);
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
#include <sys/mman.h>
|
||||
#include <poll.h>
|
||||
#include <signal.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/sockios.h>
|
||||
|
||||
#include "vsock_test_zerocopy.h"
|
||||
#include "timeout.h"
|
||||
@@ -1238,6 +1240,79 @@ static void test_double_bind_connect_client(const struct test_opts *opts)
|
||||
}
|
||||
}
|
||||
|
||||
#define MSG_BUF_IOCTL_LEN 64
|
||||
static void test_unsent_bytes_server(const struct test_opts *opts, int type)
|
||||
{
|
||||
unsigned char buf[MSG_BUF_IOCTL_LEN];
|
||||
int client_fd;
|
||||
|
||||
client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type);
|
||||
if (client_fd < 0) {
|
||||
perror("accept");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
recv_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf));
|
||||
control_writeln("RECEIVED");
|
||||
|
||||
close(client_fd);
|
||||
}
|
||||
|
||||
static void test_unsent_bytes_client(const struct test_opts *opts, int type)
|
||||
{
|
||||
unsigned char buf[MSG_BUF_IOCTL_LEN];
|
||||
int ret, fd, sock_bytes_unsent;
|
||||
|
||||
fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
|
||||
if (fd < 0) {
|
||||
perror("connect");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (int i = 0; i < sizeof(buf); i++)
|
||||
buf[i] = rand() & 0xFF;
|
||||
|
||||
send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
|
||||
control_expectln("RECEIVED");
|
||||
|
||||
ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
|
||||
if (ret < 0) {
|
||||
if (errno == EOPNOTSUPP) {
|
||||
fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
|
||||
} else {
|
||||
perror("ioctl");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
} else if (ret == 0 && sock_bytes_unsent != 0) {
|
||||
fprintf(stderr,
|
||||
"Unexpected 'SIOCOUTQ' value, expected 0, got %i\n",
|
||||
sock_bytes_unsent);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
static void test_stream_unsent_bytes_client(const struct test_opts *opts)
|
||||
{
|
||||
test_unsent_bytes_client(opts, SOCK_STREAM);
|
||||
}
|
||||
|
||||
static void test_stream_unsent_bytes_server(const struct test_opts *opts)
|
||||
{
|
||||
test_unsent_bytes_server(opts, SOCK_STREAM);
|
||||
}
|
||||
|
||||
static void test_seqpacket_unsent_bytes_client(const struct test_opts *opts)
|
||||
{
|
||||
test_unsent_bytes_client(opts, SOCK_SEQPACKET);
|
||||
}
|
||||
|
||||
static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts)
|
||||
{
|
||||
test_unsent_bytes_server(opts, SOCK_SEQPACKET);
|
||||
}
|
||||
|
||||
#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128)
|
||||
/* This define is the same as in 'include/linux/virtio_vsock.h':
|
||||
* it is used to decide when to send credit update message during
|
||||
@@ -1523,6 +1598,16 @@ static struct test_case test_cases[] = {
|
||||
.run_client = test_stream_rcvlowat_def_cred_upd_client,
|
||||
.run_server = test_stream_cred_upd_on_low_rx_bytes,
|
||||
},
|
||||
{
|
||||
.name = "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes",
|
||||
.run_client = test_stream_unsent_bytes_client,
|
||||
.run_server = test_stream_unsent_bytes_server,
|
||||
},
|
||||
{
|
||||
.name = "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes",
|
||||
.run_client = test_seqpacket_unsent_bytes_client,
|
||||
.run_server = test_seqpacket_unsent_bytes_server,
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user