Skip to content

Commit

Permalink
[PAL/vm-common] vsock: add guest-side vsock buf management
Browse files Browse the repository at this point in the history
The guest (our Gramine) should use `tx_cnt`, `peer_fwd_cnt` and
`peer_buf_alloc` to calculate the number of bytes currently available in
the host buffer, and thus currently possible for Gramine to send on a
particular connection (particular virtual socket). Previously, we didn't
implement this guest-side buffer space management which could lead to
app's select/poll/epoll incorrectly returning POLLOUT events.

This commit fixes this by implementing guest-side buffer space
management as outlined by the virtio spec.

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
dimakuv committed May 2, 2024
1 parent 8598f92 commit 139c9ee
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 14 deletions.
2 changes: 1 addition & 1 deletion pal/src/host/vm-common/kernel_virtio.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,14 +362,14 @@ int virtio_vsock_connect(int sockfd, const void* addr, size_t addrlen, uint64_t
int virtio_vsock_shutdown(int sockfd, enum virtio_vsock_shutdown shutdown);
int virtio_vsock_close(int sockfd, uint64_t timeout_us);
long virtio_vsock_peek(int sockfd);
bool virtio_vsock_can_write(int sockfd);
long virtio_vsock_read(int sockfd, void* buf, size_t count);
long virtio_vsock_write(int sockfd, const void* buf, size_t count);
int virtio_vsock_getsockname(int sockfd, const void* addr, size_t* addrlen);
int virtio_vsock_set_socket_options(int sockfd, bool ipv6_v6only, bool reuseport);

int virtio_vsock_isr(void);
int virtio_vsock_bottomhalf(void);
bool virtio_vsock_can_write(void);
int virtio_vsock_init(struct virtio_pci_regs* pci_regs, struct virtio_vsock_config* pci_config,
uint64_t notify_off_addr, uint32_t notify_off_multiplier,
uint32_t* interrupt_status_reg);
Expand Down
61 changes: 49 additions & 12 deletions pal/src/host/vm-common/kernel_virtio_vsock.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
*
* Reference: https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.pdf
*
* TODO:
* - Implement guest-side buffer space management via peer_buf_alloc, peer_fwd_cnt, tx_cnt
* (see section 5.10.6.3 in spec)
*
* Diagram with flows:
*
* Bottomhalves thread (CPU0) + App threads (CPU0-CPUn)
Expand Down Expand Up @@ -418,13 +414,6 @@ static int send_pending_tq_control_packets(void) {
return ret;
}

bool virtio_vsock_can_write(void) {
spinlock_lock(&g_vsock_transmit_lock);
bool can_write = (g_vsock && g_vsock->tq->free_desc != g_vsock->tq->queue_size);
spinlock_unlock(&g_vsock_transmit_lock);
return can_write;
}

/* called from the bottomhalf thread in normal context (not interrupt context) */
int virtio_vsock_bottomhalf(void) {
int handle_rq_ret = handle_rq_with_disabled_notifications();
Expand Down Expand Up @@ -574,6 +563,10 @@ static struct virtio_vsock_connection* create_connection(uint64_t host_port, uin
conn->host_port = host_port;
conn->guest_port = guest_port;

conn->tx_cnt = 0;
conn->peer_fwd_cnt = 0;
conn->peer_buf_alloc = 0;

conn->fwd_cnt = 0;
conn->buf_alloc = VSOCK_MAX_PACKETS * VSOCK_MAX_PAYLOAD_SIZE;

Expand Down Expand Up @@ -742,7 +735,12 @@ static int send_rw_packet(struct virtio_vsock_connection* conn, const char* payl
if (!packet)
return -PAL_ERROR_NOMEM;

return copy_into_tq_and_free(packet);
int ret = copy_into_tq_and_free(packet);
if (!ret) {
/* successfully queued the packet to be sent, only then update "bytes transmitted" */
conn->tx_cnt += payload_size;
}
return ret;
}

/* takes ownership of the packet */
Expand Down Expand Up @@ -834,6 +832,15 @@ static int process_packet(struct virtio_vsock_packet* packet) {
goto out;
}

/*
* Even if packets have malicious fwd_cnt and buf_alloc values, this is benign because it only
* affects buffer space management: (1) Gramine may conclude that the host side can receive
* bytes even though it can't, or (2) Gramine may conclude that the host side can't receive
* bytes even if it can. Former case results in a packet dropped by the host, latter case
* results in Gramine app not sending the packet. Both cases are Denial of Service.
*
* See also virtio_vsock_can_write().
*/
conn->peer_fwd_cnt = packet->header.fwd_cnt;
conn->peer_buf_alloc = packet->header.buf_alloc;

Expand Down Expand Up @@ -1561,6 +1568,36 @@ long virtio_vsock_peek(int sockfd) {
return ret;
}

bool virtio_vsock_can_write(int sockfd) {
spinlock_lock(&g_vsock_transmit_lock);
bool can_write = (g_vsock && g_vsock->tq->free_desc != g_vsock->tq->queue_size);
spinlock_unlock(&g_vsock_transmit_lock);

if (can_write) {
/* we can send; additionally check that host can receive (buffer space management) */
spinlock_lock(&g_vsock_connections_lock);

struct virtio_vsock_connection* conn = get_connection(sockfd);
if (conn) {
uint32_t bytes_in_flight = conn->tx_cnt - conn->peer_fwd_cnt;
int64_t bytes_avail_in_peer_buf = (int64_t)conn->peer_buf_alloc - bytes_in_flight;
if (bytes_avail_in_peer_buf < 0) {
/* play it safe (peer_fwd_cnt and peer_buf_alloc are potentially malicious) */
bytes_avail_in_peer_buf = 0;
}

can_write = (bytes_avail_in_peer_buf > 0);
} else {
/* maybe connection was shutdown, just return that we can't write in this case */
can_write = false;
}

spinlock_unlock(&g_vsock_connections_lock);
}

return can_write;
}

long virtio_vsock_read(int sockfd, void* buf, size_t count) {
long ret;

Expand Down
2 changes: 1 addition & 1 deletion pal/src/host/vm-common/pal_common_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ static int check_socket_handle(struct pal_handle* handle, pal_wait_flags_t event
if ((events & PAL_WAIT_READ) && peeked)
revents |= PAL_WAIT_READ;

if ((events & PAL_WAIT_WRITE) && virtio_vsock_can_write())
if ((events & PAL_WAIT_WRITE) && virtio_vsock_can_write(handle->sock.fd))
revents |= PAL_WAIT_WRITE;

*out_events = revents;
Expand Down

0 comments on commit 139c9ee

Please sign in to comment.