Skip to content

Commit

Permalink
[LibOS,PAL] Refactor LibOS's /dev/tty and PAL's corresponding `dev:…
Browse files Browse the repository at this point in the history
…tty`

Gramine's LibOS initialization routine can be compared to the Linux's
`init` process. Therefore, similar to what's done by the `init` process
(or more specifically, by one of its utilities `getty`), Gramine
initializes the stdin, stdout and stderr streams to file descriptors
0, 1 and 2, opened on the `/dev/tty` device (controlling terminal):
- `stdin = open("/dev/tty", O_RDONLY)`
- `stdout = stderr = open("/dev/tty", O_WRONLY | O_APPEND)`

Note that this initialization routine happens only in the first (aka
main, aka master) Gramine process. Child processes will not open this
`/dev/tty` device; instead they will inherit all the FDs from the parent
process.

On the LibOS side, the previous implementation was confusing: it mounted
`/dev/tty` device file in a special way: not via Gramine's devfs, but
instead via chrootfs. On the PAL side, the `/dev/tty` file was
confusingly mapped to PAL's `dev:tty`, which was implemented via
`PAL_TYPE_DEV` but in a special way: all device APIs in `pal_devices.c`
had a special code path for `dev:tty` (which was also erroneously
checked by comparing the FD to 0 or 1, even though these host FDs could
be different in the child).

This commit refactors this mess:
- In LibOS, `/dev/tty` becomes a pseudo-file mounted via devfs.
- In PAL, `/dev/tty` maps to `console:`, which is a new type of PAL
  handle `PAL_TYPE_CONSOLE`. This new type is removed from
  `pal_devices.c` and instead implemented in a new `pal_console.c`.

New LibOS test `console` is added to test the tty/console.

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
Dmitrii Kuvaiskii committed Sep 28, 2023
1 parent 2ad54dd commit c11f4c6
Show file tree
Hide file tree
Showing 33 changed files with 629 additions and 281 deletions.
2 changes: 2 additions & 0 deletions common/include/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,13 +372,15 @@ int buf_flush(struct print_buf* buf);
#define URI_TYPE_DIR "dir"
#define URI_TYPE_PIPE "pipe"
#define URI_TYPE_PIPE_SRV "pipe.srv"
#define URI_TYPE_CONSOLE "console"
#define URI_TYPE_DEV "dev"
#define URI_TYPE_EVENTFD "eventfd"
#define URI_TYPE_FILE "file"

#define URI_PREFIX_DIR URI_TYPE_DIR URI_PREFIX_SEPARATOR
#define URI_PREFIX_PIPE URI_TYPE_PIPE URI_PREFIX_SEPARATOR
#define URI_PREFIX_PIPE_SRV URI_TYPE_PIPE_SRV URI_PREFIX_SEPARATOR
#define URI_PREFIX_CONSOLE URI_TYPE_CONSOLE URI_PREFIX_SEPARATOR
#define URI_PREFIX_DEV URI_TYPE_DEV URI_PREFIX_SEPARATOR
#define URI_PREFIX_EVENTFD URI_TYPE_EVENTFD URI_PREFIX_SEPARATOR
#define URI_PREFIX_FILE URI_TYPE_FILE URI_PREFIX_SEPARATOR
Expand Down
1 change: 1 addition & 0 deletions libos/include/libos_fs_pseudo.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ struct libos_dev_ops {
int (*flush)(struct libos_handle* hdl);
int64_t (*seek)(struct libos_handle* hdl, int64_t offset, int whence);
int (*truncate)(struct libos_handle* hdl, uint64_t len);
int (*poll)(struct libos_handle* hdl, int in_events, int* out_events);
};

#define PSEUDO_PERM_DIR PERM_r_xr_xr_x /* default for directories */
Expand Down
13 changes: 6 additions & 7 deletions libos/src/bookkeep/libos_handle.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ static MEM_MGR handle_mgr = NULL;

#define INIT_HANDLE_MAP_SIZE 32

static int init_tty_handle(struct libos_handle* hdl, bool write) {
int flags = write ? (O_WRONLY | O_APPEND) : O_RDONLY;
return open_namei(hdl, /*start=*/NULL, "/dev/tty", flags, LOOKUP_FOLLOW, /*found=*/NULL);
}

int open_executable(struct libos_handle* hdl, const char* path) {
struct libos_dentry* dent = NULL;

Expand Down Expand Up @@ -186,7 +181,9 @@ int init_std_handles(void) {
return -ENOMEM;
}

if ((ret = init_tty_handle(stdin_hdl, /*write=*/false)) < 0) {
ret = open_namei(stdin_hdl, /*start=*/NULL, "/dev/tty", O_RDONLY, LOOKUP_FOLLOW,
/*found=*/NULL);
if (ret < 0) {
rwlock_write_unlock(&handle_map->lock);
put_handle(stdin_hdl);
return ret;
Expand All @@ -204,7 +201,9 @@ int init_std_handles(void) {
return -ENOMEM;
}

if ((ret = init_tty_handle(stdout_hdl, /*write=*/true)) < 0) {
ret = open_namei(stdout_hdl, /*start=*/NULL, "/dev/tty", O_WRONLY | O_APPEND, LOOKUP_FOLLOW,
/*found=*/NULL);
if (ret < 0) {
rwlock_write_unlock(&handle_map->lock);
put_handle(stdout_hdl);
return ret;
Expand Down
10 changes: 1 addition & 9 deletions libos/src/fs/chroot/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,17 +135,9 @@ static int chroot_lookup(struct libos_dentry* dent) {
* We don't know the file type yet, so we can't construct a PAL URI with the right prefix. In
* most cases, a "file:" prefix is good enough: `PalStreamAttributesQuery` will access the file
* and report the right file type.
*
* The only exception is when this is the root dentry of a "dev:" mount, i.e. a directly mounted
* device. This is because PAL recognizes a special "dev:tty" device, which needs to be referred
* to by this exact URI (and "file:tty" will not work).
*/
char* uri = NULL;
mode_t tmp_type = S_IFREG;
if (!dent->parent && strstartswith(dent->mount->uri, URI_PREFIX_DEV))
tmp_type = S_IFCHR;

ret = chroot_dentry_uri(dent, tmp_type, &uri);
ret = chroot_dentry_uri(dent, S_IFREG, &uri);
if (ret < 0)
goto out;

Expand Down
67 changes: 67 additions & 0 deletions libos/src/fs/dev/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* This file contains the implementation of `/dev` pseudo-filesystem.
*/

#include "libos_flags_conv.h"
#include "libos_fs_pseudo.h"
#include "pal.h"

Expand Down Expand Up @@ -55,6 +56,61 @@ static ssize_t dev_random_read(struct libos_handle* hdl, void* buf, size_t count
return count;
}

static int dev_tty_open(struct libos_handle* hdl, struct libos_dentry* dent, int flags) {
__UNUSED(dent);

char* uri = strdup(URI_PREFIX_CONSOLE);
if (!uri)
return -ENOMEM;

PAL_HANDLE palhdl;
int ret = PalStreamOpen(uri, LINUX_OPEN_FLAGS_TO_PAL_ACCESS(flags), PSEUDO_PERM_FILE_RW,
PAL_CREATE_NEVER, /*options=*/0, &palhdl);
if (ret < 0) {
free(uri);
return pal_to_unix_errno(ret);
}

assert(hdl);
hdl->uri = uri;
hdl->pal_handle = palhdl;
return 0;
}

static ssize_t dev_tty_read(struct libos_handle* hdl, void* buf, size_t count) {
size_t actual_count = count;
int ret = PalStreamRead(hdl->pal_handle, /*offset=*/0, &actual_count, buf);
if (ret < 0)
return pal_to_unix_errno(ret);

assert(actual_count <= count);
return actual_count;
}

static ssize_t dev_tty_write(struct libos_handle* hdl, const void* buf, size_t count) {
size_t actual_count = count;
int ret = PalStreamWrite(hdl->pal_handle, /*offset=*/0, &actual_count, (void*)buf);
if (ret < 0)
return pal_to_unix_errno(ret);

assert(actual_count <= count);
return actual_count;
}

static int dev_tty_flush(struct libos_handle* hdl) {
int ret = PalStreamFlush(hdl->pal_handle);
return pal_to_unix_errno(ret);
}

/* this dummy function is required only to override the default behavior of pseudo_poll() -- this is
* because we actually want to poll on the host tty/console; see also libos_poll.c */
static int dev_tty_poll(struct libos_handle* hdl, int in_events, int* out_events) {
__UNUSED(hdl);
__UNUSED(in_events);
__UNUSED(out_events);
return -ENOSYS;
}

int init_devfs(void) {
struct pseudo_node* root = pseudo_add_root_dir("dev");

Expand Down Expand Up @@ -95,6 +151,17 @@ int init_devfs(void) {
/* /dev/urandom is implemented the same as /dev/random, so it has the same operations */
urandom->dev.dev_ops = random->dev.dev_ops;

/* see `man 4 tty` for more info, including major/minor numbers */
struct pseudo_node* tty = pseudo_add_dev(root, "tty");
tty->perm = PSEUDO_PERM_FILE_RW;
tty->dev.major = 5;
tty->dev.minor = 0;
tty->dev.dev_ops.open = &dev_tty_open;
tty->dev.dev_ops.read = &dev_tty_read;
tty->dev.dev_ops.write = &dev_tty_write;
tty->dev.dev_ops.flush = &dev_tty_flush;
tty->dev.dev_ops.poll = &dev_tty_poll;

struct pseudo_node* stdin = pseudo_add_link(root, "stdin", NULL);
stdin->link.target = "/proc/self/fd/0";
struct pseudo_node* stdout = pseudo_add_link(root, "stdout", NULL);
Expand Down
8 changes: 0 additions & 8 deletions libos/src/fs/libos_fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,6 @@ static int mount_sys(void) {
if (ret < 0)
return ret;

ret = mount_fs(&(struct libos_mount_params){
.type = "chroot",
.path = "/dev/tty",
.uri = URI_PREFIX_DEV "tty",
});
if (ret < 0)
return ret;

ret = mount_fs(&(struct libos_mount_params){
.type = "pseudo",
.path = "/sys",
Expand Down
6 changes: 5 additions & 1 deletion libos/src/fs/libos_fs_pseudo.c
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ static int pseudo_truncate(struct libos_handle* hdl, file_off_t size) {

case PSEUDO_DEV:
if (!node->dev.dev_ops.truncate)
return -EACCES;
return -EINVAL;
return node->dev.dev_ops.truncate(hdl, size);

default:
Expand Down Expand Up @@ -491,6 +491,10 @@ static int pseudo_poll(struct libos_handle* hdl, int events, int* out_events) {
}

case PSEUDO_DEV: {
if (node->dev.dev_ops.poll)
return node->dev.dev_ops.poll(hdl, events, out_events);

/* if no handle-specific poll, then use a generic one */
*out_events = 0;
if (node->dev.dev_ops.read)
*out_events |= events & (POLLIN | POLLRDNORM);
Expand Down
4 changes: 1 addition & 3 deletions libos/src/fs/libos_namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,7 @@ int dentry_open(struct libos_handle* hdl, struct libos_dentry* dent, int flags)

/* truncate regular writable file if O_TRUNC is given */
if ((flags & O_TRUNC) && ((flags & O_RDWR) | (flags & O_WRONLY))
&& (dent->inode->type != S_IFDIR)
&& (dent->inode->type != S_IFLNK)) {

&& (dent->inode->type == S_IFREG)) {
if (!(fs->fs_ops && fs->fs_ops->truncate))
return -EINVAL;

Expand Down
2 changes: 1 addition & 1 deletion libos/src/sys/libos_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ long libos_syscall_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) {

switch (cmd) {
case TIOCGPGRP:
if (!hdl->uri || strcmp(hdl->uri, "dev:tty") != 0) {
if (!hdl->uri || strcmp(hdl->uri, URI_PREFIX_CONSOLE)) {
ret = -ENOTTY;
break;
}
Expand Down
39 changes: 24 additions & 15 deletions libos/src/sys/libos_poll.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,33 +111,42 @@ static long do_poll(struct pollfd* fds, size_t fds_len, uint64_t* timeout_us) {
events &= ~(POLLOUT | POLLWRNORM);
}

/*
* Some handles (e.g. pipes) do not implement a poll callback at all. In such case we let
* PAL do the actual polling.
*
* Some handles (e.g. files) do have their own, handle-specific poll callback. In such case
* we do not add these handles for PAL polling, but instead populate `revents` of a
* handle-corresponding FD with the result of the poll callback.
*
* Finally, some handles (e.g. tty/console) implement a dummy poll callback that returns
* "Function not implemented" (-ENOSYS) error. We have this special case because it is
* impossible to *not* implement a callback: such handles have two layers of poll
* indirection (e.g. tty belongs to the "pseudo" FS which has a generic "pseudo" poll
* callback, which calls the actual tty-handle callback).
*/
bool handle_specific_poll_invoked = false;
if (handle->fs && handle->fs->fs_ops && handle->fs->fs_ops->poll) {
ret = handle->fs->fs_ops->poll(handle, events, &events);
/*
* FIXME: remove this hack.
* Initial 0,1,2 fds in Gramine are represented by "/dev/tty" (whatever that means)
* and have `generic_inode_poll` set as poll callback, which returns `-EAGAIN` on
* non-regular-file handles. In such case we let PAL do the actual polling.
*/
if (ret == -EAGAIN && handle->uri && !strcmp(handle->uri, "dev:tty")) {
goto dev_tty_hack;
}

if (ret < 0) {
if (ret < 0 && ret != -ENOSYS) {
/* ENOSYS implies that no handle-specific poll was found; other errors imply that
* there was a handle-specific poll, but its invocation failed for other reasons */
rwlock_read_unlock(&map->lock);
goto out;
}
if (ret != -ENOSYS)
handle_specific_poll_invoked = true;
}

if (handle_specific_poll_invoked) {
fds[i].revents = events;
if (events) {
ret_events_count++;
}

continue;

dev_tty_hack:;
continue; /* for loop over FDs to poll */
}

/* add the handle for PAL polling */
PAL_HANDLE pal_handle;
if (handle->type == TYPE_SOCK) {
pal_handle = __atomic_load_n(&handle->info.sock.pal_handle, __ATOMIC_ACQUIRE);
Expand Down
113 changes: 113 additions & 0 deletions libos/test/regression/console.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/* Copyright (C) 2023 Intel Corporation */

#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/wait.h>
#include <unistd.h>

#include "common.h"

#define FIRST_HELLO_STDOUT "First hello on stdout!\n"
#define FIRST_HELLO_STDERR "First hello on stderr!\n"
#define SECOND_HELLO_STDOUT "Second hello on stdout!\n"
#define SECOND_HELLO_STDERR "Second hello on stderr!\n"
#define IGNORED_HELLO_STDOUT "Ignored hello on stdout!\n"
#define IGNORED_HELLO_STDERR "Ignored hello on stderr!\n"

/* Notes:
* - stdout and stderr are periodically closed or redirected to /dev/null in this test, so
* diagnostic error messages will not appear on the terminal (like errx output). Use strace and
* Gramine logs to analyze and debug this test instead.
* - Under Gramine, app stdout and stderr streams are both redirected to host stdout. The host
* stderr only prints Gramine logs. So, don't be surprised that `console > /dev/null` and
* `gramine-direct console > /dev/null` show different results.
*/

int main(void) {
/* initialization -- write some messages and save stdout/stderr (for further restore) */
ssize_t x = CHECK(write(STDOUT_FILENO, FIRST_HELLO_STDOUT, strlen(FIRST_HELLO_STDOUT)));
if (x != strlen(FIRST_HELLO_STDOUT))
CHECK(-1);
x = CHECK(write(STDERR_FILENO, FIRST_HELLO_STDERR, strlen(FIRST_HELLO_STDERR)));
if (x != strlen(FIRST_HELLO_STDERR))
CHECK(-1);

int saved_stdout = CHECK(dup(STDOUT_FILENO));
int saved_stderr = CHECK(dup(STDERR_FILENO));

/* test 1 -- close stdout/stderr, spawn a child, the child should *not* print anything */
CHECK(close(STDOUT_FILENO));
CHECK(close(STDERR_FILENO));

pid_t p = CHECK(fork());
if (p == 0) {
x = write(STDOUT_FILENO, IGNORED_HELLO_STDOUT, strlen(IGNORED_HELLO_STDOUT));
if (x != -1 || errno != EBADF)
errx(1, "write(stdout) didn't fail with EBADF (returned: %ld, errno: %d)", x, errno);
x = write(STDERR_FILENO, IGNORED_HELLO_STDERR, strlen(IGNORED_HELLO_STDERR));
if (x != -1 || errno != EBADF)
errx(1, "write(stderr) didn't fail with EBADF (returned: %ld, errno: %d)", x, errno);
return 0;
}

int status = 0;
CHECK(wait(&status));
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
errx(1, "child died with status: %#x", status);

/* test 2 -- restore stdout/stderr and print one more message */
CHECK(dup2(saved_stdout, STDOUT_FILENO));
CHECK(dup2(saved_stderr, STDERR_FILENO));

x = CHECK(write(STDOUT_FILENO, SECOND_HELLO_STDOUT, strlen(SECOND_HELLO_STDOUT)));
if (x != strlen(SECOND_HELLO_STDOUT))
CHECK(-1);
x = CHECK(write(STDERR_FILENO, SECOND_HELLO_STDERR, strlen(SECOND_HELLO_STDERR)));
if (x != strlen(SECOND_HELLO_STDERR))
CHECK(-1);

/* test 3 -- redirect stdout/stderr to null, the process should *not* print anything */
int dev_null_fd = open("/dev/null", O_WRONLY, 0666);
CHECK(close(STDOUT_FILENO));
CHECK(close(STDERR_FILENO));
CHECK(dup2(dev_null_fd, STDOUT_FILENO));
CHECK(dup2(dev_null_fd, STDERR_FILENO));
CHECK(close(dev_null_fd)); /* not needed anymore */

x = CHECK(write(STDOUT_FILENO, IGNORED_HELLO_STDOUT, strlen(IGNORED_HELLO_STDOUT)));
if (x != strlen(IGNORED_HELLO_STDOUT))
CHECK(-1);
x = CHECK(write(STDERR_FILENO, IGNORED_HELLO_STDERR, strlen(IGNORED_HELLO_STDERR)));
if (x != strlen(IGNORED_HELLO_STDERR))
CHECK(-1);

/* test 4 -- spawn a child, the child should *not* print anything */
p = CHECK(fork());
if (p == 0) {
x = CHECK(write(STDOUT_FILENO, IGNORED_HELLO_STDOUT, strlen(IGNORED_HELLO_STDOUT)));
if (x != strlen(IGNORED_HELLO_STDOUT))
CHECK(-1);
x = CHECK(write(STDERR_FILENO, IGNORED_HELLO_STDERR, strlen(IGNORED_HELLO_STDERR)));
if (x != strlen(IGNORED_HELLO_STDERR))
CHECK(-1);
return 0;
}

status = 0;
CHECK(wait(&status));
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
errx(1, "child died with status: %#x", status);

/* finalization -- restore stdout/stderr and write some messages */
CHECK(close(STDOUT_FILENO));
CHECK(close(STDERR_FILENO));
CHECK(dup2(saved_stdout, STDOUT_FILENO));
CHECK(dup2(saved_stderr, STDERR_FILENO));

puts("TEST OK");
return 0;
}
Loading

0 comments on commit c11f4c6

Please sign in to comment.