Skip to content

Commit

Permalink
[PAL/vm-common] vsock: fix host-side vsock buf management
Browse files Browse the repository at this point in the history
The host uses `fwd_cnt` and `buf_alloc` obtained in the guest's vsock
packets for its buffer management (deciding whether to send a next
packet to guest or back off). We previously incorrectly implemented this
by making the two variables global for all virtual sockets. In reality,
these variables are per-connection (per-socket). This mismatch between
what the host expects and what Gramine guest implements led to hangs
because on new connections, Gramine reported too many "bytes received"
in `fwd_cnt` counter, leading the host to believe there are too many
packets in flight on the new connection and to back off constantly.

Fix by moving the vars to fields of `struct virtio_vsock_connection`. An
additional benefit is that we already have proper locking for
per-connection fields, so there is no need for atomics or special
synchronization.

Interestingly, Linux prior to v6.7 did not expose this Gramine bug
because it itself had a bug of integer wrap around. This was fixed with
commit torvalds/linux@60316d7f10b17a in v6.7.

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
dimakuv committed May 2, 2024
1 parent 2d81da2 commit 8598f92
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 27 deletions.
13 changes: 0 additions & 13 deletions pal/src/host/vm-common/kernel_virtio.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,11 +313,6 @@ struct virtio_vsock_config {
* - tq_notify_addr is set at init and used in copy_into_tq(), sync via transmit-side lock
* - host_cid is set at init, no sync required
* - guest_cid is set at init, no sync required
* - peer_fwd_cnt and peer_buffer_alloc are used by CPU0-tied bottomhalves thread during RX,
* sync via receive-side lock
* - fwd_cnt and msg_cnt are used during RX and TX, must be accessed via atomic ops
* - buf_alloc is set at init, no sync required
* - tx_cnt is used in copy_into_tq(), sync via transmit-side lock
* - conns_size, conns, conns_by_host_port used in many places, sync via connections lock
* - pending_tq_control_packets and co. used during TX, sync via transmit-side lock
* - shared_rq_buf is set at init and used during RX, sync via receive-side lock
Expand All @@ -337,14 +332,6 @@ struct virtio_vsock {
uint64_t host_cid;
uint64_t guest_cid;

uint32_t peer_fwd_cnt; /* total bytes received by host on tq */
uint32_t peer_buf_alloc; /* total buffer space on host */
uint32_t fwd_cnt; /* total bytes received by guest on rq */
uint32_t buf_alloc; /* total buffer space on guest */

uint32_t tx_cnt; /* total bytes sent by guest on tq */
uint32_t msg_cnt; /* total number of received msgs */

uint32_t conns_size; /* size of dynamic array */
struct virtio_vsock_connection** conns; /* dynamic array: fd -> connection */
struct virtio_vsock_connection* conns_by_host_port; /* hash table: host port -> connection */
Expand Down
27 changes: 13 additions & 14 deletions pal/src/host/vm-common/kernel_virtio_vsock.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
* Reference: https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.pdf
*
* TODO:
* - Buffer space management via buf_alloc, fwd_cnt, tx_cnt (see section 5.10.6.3 in spec)
* - Implement guest-side buffer space management via peer_buf_alloc, peer_fwd_cnt, tx_cnt
* (see section 5.10.6.3 in spec)
*
* Diagram with flows:
*
Expand Down Expand Up @@ -250,8 +251,6 @@ static void copy_into_tq_internal(struct virtio_vsock_packet* packet, uint64_t p
vm_shared_writew(&g_vsock->tq->avail->ring[avail_idx % g_vsock->tq->queue_size], desc_idx);
vm_shared_writew(&g_vsock->tq->avail->idx, g_vsock->tq->cached_avail_idx);

g_vsock->tx_cnt += packet->header.size;

uint16_t host_device_used_flags = vm_shared_readw(&g_vsock->tq->used->flags);
if (!(host_device_used_flags & VIRTQ_USED_F_NO_NOTIFY))
vm_mmio_writew(g_vsock->tq_notify_addr, /*queue_sel=*/1);
Expand Down Expand Up @@ -575,6 +574,9 @@ static struct virtio_vsock_connection* create_connection(uint64_t host_port, uin
conn->host_port = host_port;
conn->guest_port = guest_port;

conn->fwd_cnt = 0;
conn->buf_alloc = VSOCK_MAX_PACKETS * VSOCK_MAX_PAYLOAD_SIZE;

if (attach_connection(conn) < 0) {
free(conn);
return NULL;
Expand Down Expand Up @@ -616,8 +618,8 @@ static struct virtio_vsock_packet* generate_packet(struct virtio_vsock_connectio
packet->header.op = op;
packet->header.flags = flags;

packet->header.buf_alloc = g_vsock->buf_alloc;
packet->header.fwd_cnt = __atomic_load_n(&g_vsock->fwd_cnt, __ATOMIC_ACQUIRE);
packet->header.buf_alloc = conn->buf_alloc;
packet->header.fwd_cnt = conn->fwd_cnt;

packet->header.size = payload_size;
memcpy(packet->payload, payload, payload_size);
Expand Down Expand Up @@ -649,8 +651,9 @@ static int neglect_packet(struct virtio_vsock_packet* in) {
packet->header.op = VIRTIO_VSOCK_OP_RST;
packet->header.flags = 0;

packet->header.buf_alloc = g_vsock->buf_alloc;
packet->header.fwd_cnt = __atomic_load_n(&g_vsock->fwd_cnt, __ATOMIC_ACQUIRE);
/* buffer-management fields are useless in RST packets, so zero out */
packet->header.buf_alloc = 0;
packet->header.fwd_cnt = 0;

packet->header.size = 0;

Expand Down Expand Up @@ -759,8 +762,7 @@ static int recv_rw_packet(struct virtio_vsock_connection* conn,
conn->packets_for_user[idx] = packet; /* packet is now owned by conn */
conn->prepared_for_user++;

__atomic_add_fetch(&g_vsock->msg_cnt, 1, __ATOMIC_ACQ_REL);
__atomic_add_fetch(&g_vsock->fwd_cnt, packet->header.size, __ATOMIC_ACQ_REL);
conn->fwd_cnt += packet->header.size;
return 0;
}

Expand Down Expand Up @@ -832,8 +834,8 @@ static int process_packet(struct virtio_vsock_packet* packet) {
goto out;
}

g_vsock->peer_fwd_cnt = packet->header.fwd_cnt;
g_vsock->peer_buf_alloc = packet->header.buf_alloc;
conn->peer_fwd_cnt = packet->header.fwd_cnt;
conn->peer_buf_alloc = packet->header.buf_alloc;

switch (conn->state) {
case VIRTIO_VSOCK_LISTEN:
Expand Down Expand Up @@ -1160,9 +1162,6 @@ int virtio_vsock_init(struct virtio_pci_regs* pci_regs, struct virtio_vsock_conf
}

vsock->host_cid = VSOCK_HOST_CID;
vsock->tx_cnt = 0;
vsock->fwd_cnt = 0;
vsock->buf_alloc = VSOCK_MAX_PACKETS * sizeof(struct virtio_vsock_packet);

vsock->conns_size = VIRTIO_VSOCK_CONNS_INIT_SIZE;
vsock->conns = conns;
Expand Down
9 changes: 9 additions & 0 deletions pal/src/host/vm-common/kernel_virtio_vsock.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,15 @@ struct virtio_vsock_connection {
uint32_t prepared_for_user;
uint32_t consumed_by_user;

/* per-connection (per-socket) buffer space management: guest side, not used currently */
uint32_t tx_cnt; /* free-running counter: bytes transmitted to host */
uint32_t peer_fwd_cnt; /* free-running counter: bytes received by host */
uint32_t peer_buf_alloc; /* buffer space for this connection on host */

/* per-connection (per-socket) buffer space management: host side, must inform host */
uint32_t fwd_cnt; /* free-running counter: bytes received by this guest */
uint32_t buf_alloc; /* buffer space for this connection on this guest */

/* receive/send is disallowed on this connection (depends on received SHUTDOWN requests) */
bool recv_disallowed;
bool send_disallowed;
Expand Down

0 comments on commit 8598f92

Please sign in to comment.