Skip to content

Commit

Permalink
std.process.Child: use clone3 on x86 and x86_64
Browse files Browse the repository at this point in the history
  • Loading branch information
ruihe774 committed Dec 31, 2024
1 parent 367283e commit 0713379
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 10 deletions.
34 changes: 34 additions & 0 deletions lib/std/os/linux.zig
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,40 @@ pub fn clone(
) callconv(.C) usize, @ptrCast(&syscall_bits.clone))(func, stack, flags, arg, ptid, tp, ctid);
}

pub const clone_args = extern struct {
flags: u64,
pidfd: u64,
child_tid: u64,
parent_tid: u64,
exit_signal: u64,
stack: u64,
stack_size: u64,
tls: u64,
set_tid: u64,
set_tid_size: u64,
cgroup: u64,
};

pub fn clone3(
cl_args: *const clone_args,
size: usize,
func: *const fn (arg: usize) callconv(.C) u8,
arg: usize,
) usize {
// TODO: write asm for other arch.
if (@hasDecl(syscall_bits, "clone3")) {
// Can't directly call a naked function; cast to C calling convention first.
return @as(*const fn (
cl_args: *const clone_args,
size: usize,
func: *const fn (arg: usize) callconv(.C) u8,
arg: usize,
) callconv(.C) usize, @ptrCast(&syscall_bits.clone3))(cl_args, size, func, arg);
} else {
return @bitCast(-@as(isize, @intFromEnum(E.NOSYS)));
}
}

pub const ARCH = arch_bits.ARCH;
pub const Elf_Symndx = arch_bits.Elf_Symndx;
pub const F = arch_bits.F;
Expand Down
31 changes: 31 additions & 0 deletions lib/std/os/linux/x86.zig
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,37 @@ pub fn clone() callconv(.Naked) usize {
);
}

pub fn clone3() callconv(.Naked) usize {
asm volatile (
\\ movl 4(%%esp),%%ecx
\\ movl 12(%esp),%%edx
\\ pushl %%ebx
\\ pushl %%esi
\\ movl 24(%%esp),%%esi
\\ movl %%ecx,%%ebx
\\ movl 16(%%esp),%%ecx
\\ movl $435,%%eax // SYS_clone3
\\ int $128
\\ testl %%eax,%%eax
\\ jz 1f
\\ popl %%esi
\\ popl %%ebx
\\ retl
\\
\\1:
\\ .cfi_undefined %%eip
\\ xorl %%ebp,%%ebp
\\
\\ andl $-16,%%esp
\\ subl $12,%%esp
\\ pushl %%esi
\\ calll *%%edx
\\ movl %%eax,%%ebx
\\ movl $1,%%eax // SYS_exit
\\ int $128
);
}

pub fn restore() callconv(.Naked) noreturn {
switch (@import("builtin").zig_backend) {
.stage2_c => asm volatile (
Expand Down
21 changes: 21 additions & 0 deletions lib/std/os/linux/x86_64.zig
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,27 @@ pub fn clone() callconv(.Naked) usize {
);
}

pub fn clone3() callconv(.Naked) usize {
asm volatile (
\\ movl $435,%%eax // SYS_clone3
\\ movq %%rcx,%%r8
\\ syscall
\\ testq %%rax,%%rax
\\ jz 1f
\\ retq
\\
\\1: .cfi_undefined %%rip
\\ xorl %%ebp,%%ebp
\\
\\ movq %%r8,%%rdi
\\ callq *%%rdx
\\ movl %%eax,%%edi
\\ movl $60,%%eax // SYS_exit
\\ syscall
\\
);
}

pub const restore = restore_rt;

pub fn restore_rt() callconv(.Naked) noreturn {
Expand Down
37 changes: 27 additions & 10 deletions lib/std/process/Child.zig
Original file line number Diff line number Diff line change
Expand Up @@ -724,10 +724,6 @@ fn spawnPosix(self: *ChildProcess) SpawnError!void {
immediateExit(spawnPosixChildHelper(@intFromPtr(&child_arg)));
}
} else {
var old_mask: posix.sigset_t = undefined;
posix.sigprocmask(posix.SIG.SETMASK, &linux.all_mask, &old_mask);
defer posix.sigprocmask(posix.SIG.SETMASK, &old_mask, null);
child_arg.sigmask = &old_mask;
child_arg.ret_err = null;
// Although the stack is fixed sized, we alloc it here,
// because stack-smashing protection may have higher overhead than allocation.
Expand All @@ -737,13 +733,34 @@ fn spawnPosix(self: *ChildProcess) SpawnError!void {
// For simplicity, we just align it to page boundary here.
const stack = try self.allocator.alignedAlloc(u8, mem.page_size, stack_size);
defer self.allocator.free(stack);
const rc = linux.clone(spawnPosixChildHelper, @intFromPtr(stack.ptr) + stack_size, linux.CLONE.VM | linux.CLONE.VFORK | linux.SIG.CHLD, @intFromPtr(&child_arg), null, 0, null);
pid_result = switch (posix.errno(rc)) {
.SUCCESS => @intCast(rc),
.AGAIN => return error.SystemResources,
.NOMEM => return error.SystemResources,

var clone_args = mem.zeroes(linux.clone_args);
clone_args.flags = linux.CLONE.VM | linux.CLONE.VFORK | linux.CLONE.CLEAR_SIGHAND;
clone_args.exit_signal = linux.SIG.CHLD;
clone_args.stack = @intFromPtr(stack.ptr);
clone_args.stack_size = stack_size;
var rc = linux.clone3(&clone_args, @sizeOf(linux.clone_args), spawnPosixChildHelper, @intFromPtr(&child_arg));
switch (posix.errno(rc)) {
.SUCCESS => {},
.AGAIN, .NOMEM => return error.SystemResources,
.INVAL, .NOSYS => {
// Fallback to use clone().
// We need to block signals here because we share VM with child before exec.
// Signal handlers may mess up our memory.
var old_mask: posix.sigset_t = undefined;
posix.sigprocmask(posix.SIG.SETMASK, &linux.all_mask, &old_mask);
defer posix.sigprocmask(posix.SIG.SETMASK, &old_mask, null);
child_arg.sigmask = &old_mask;
rc = linux.clone(spawnPosixChildHelper, @intFromPtr(stack.ptr) + stack_size, linux.CLONE.VM | linux.CLONE.VFORK | linux.SIG.CHLD, @intFromPtr(&child_arg), null, 0, null);
switch (posix.errno(rc)) {
.SUCCESS => {},
.AGAIN, .NOMEM => return error.SystemResources,
else => |err| return posix.unexpectedErrno(err),
}
},
else => |err| return posix.unexpectedErrno(err),
};
}
pid_result = @intCast(rc);
if (child_arg.ret_err) |err| {
return err;
}
Expand Down

0 comments on commit 0713379

Please sign in to comment.