diff --git a/arch/um/Makefile b/arch/um/Makefile index 00b63bac5eff..31e367e8ab4d 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ -Din6addr_loopback=kernel_in6addr_loopback \ - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ + -D__close_range=kernel__close_range KBUILD_RUSTFLAGS += -Crelocation-model=pie diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h index 2b6b44759dfa..3fbdda727373 100644 --- a/arch/um/include/shared/skas/stub-data.h +++ b/arch/um/include/shared/skas/stub-data.h @@ -12,6 +12,17 @@ #include #include +struct stub_init_data { + unsigned long stub_start; + + int stub_code_fd; + unsigned long stub_code_offset; + int stub_data_fd; + unsigned long stub_data_offset; + + unsigned long segv_handler; +}; + #define STUB_NEXT_SYSCALL(s) \ ((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len)) diff --git a/arch/um/kernel/skas/.gitignore b/arch/um/kernel/skas/.gitignore new file mode 100644 index 000000000000..c3409ced0f38 --- /dev/null +++ b/arch/um/kernel/skas/.gitignore @@ -0,0 +1,2 @@ +stub_exe +stub_exe.dbg diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index 6f86d53e3d69..fbb61968055f 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,14 +3,43 @@ # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) # -obj-y := stub.o mmu.o process.o syscall.o uaccess.o +obj-y := stub.o mmu.o process.o syscall.o uaccess.o \ + stub_exe_embed.o + +# Stub executable + +stub_exe_objs-y := stub_exe.o + +stub_exe_objs := $(foreach F,$(stub_exe_objs-y),$(obj)/$F) + +# Object file containing the ELF executable +$(obj)/stub_exe_embed.o: $(src)/stub_exe_embed.S $(obj)/stub_exe + +$(obj)/stub_exe.dbg: $(stub_exe_objs) FORCE + $(call if_changed,stub_exe) + +$(obj)/stub_exe: OBJCOPYFLAGS := -S +$(obj)/stub_exe: $(obj)/stub_exe.dbg FORCE + $(call if_changed,objcopy) + +quiet_cmd_stub_exe = STUB_EXE $@ + cmd_stub_exe = $(CC) -nostdlib -o $@ \ + $(KBUILD_CFLAGS) $(STUB_EXE_LDFLAGS) \ + $(filter %.o,$^) + +STUB_EXE_LDFLAGS = -n -static + +targets += stub_exe.dbg stub_exe $(stub_exe_objs-y) + +# end # stub.o is in the stub, so it can't be built with profiling # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work -> # disable it CFLAGS_stub.o := $(CFLAGS_NO_HARDENING) -UNPROFILE_OBJS := stub.o +CFLAGS_stub_exe.o := $(CFLAGS_NO_HARDENING) +UNPROFILE_OBJS := stub.o stub_exe.o KCOV_INSTRUMENT := n include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c new file mode 100644 index 000000000000..bc6ba2e4d805 --- /dev/null +++ b/arch/um/kernel/skas/stub_exe.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include + +void _start(void); + +noinline static void real_init(void) +{ + struct stub_init_data init_data; + unsigned long res; + struct { + void *ss_sp; + int ss_flags; + size_t ss_size; + } stack = { + .ss_size = STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, + }; + struct { + void *sa_handler_; + unsigned long sa_flags; + void *sa_restorer; + unsigned long long sa_mask; + } sa = { + /* Need to set SA_RESTORER (but the handler never returns) */ + .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000, + /* no need to mask any signals */ + .sa_mask = 0, + }; + + /* set a nice name */ + stub_syscall2(__NR_prctl, PR_SET_NAME, (unsigned long)"uml-userspace"); + + /* read information from STDIN and close it */ + res = stub_syscall3(__NR_read, 0, + (unsigned long)&init_data, sizeof(init_data)); + if (res != sizeof(init_data)) + stub_syscall1(__NR_exit, 10); + + stub_syscall1(__NR_close, 0); + + /* map stub code + data */ + res = stub_syscall6(STUB_MMAP_NR, + init_data.stub_start, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_EXEC, MAP_FIXED | MAP_SHARED, + init_data.stub_code_fd, init_data.stub_code_offset); + if (res != init_data.stub_start) + stub_syscall1(__NR_exit, 11); + + res = stub_syscall6(STUB_MMAP_NR, + init_data.stub_start + UM_KERN_PAGE_SIZE, + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, + init_data.stub_data_fd, init_data.stub_data_offset); + if (res != init_data.stub_start + UM_KERN_PAGE_SIZE) + stub_syscall1(__NR_exit, 12); + + /* setup signal stack inside stub data */ + stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; + stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); + + /* register SIGSEGV handler */ + sa.sa_handler_ = (void *) init_data.segv_handler; + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, + sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 13); + + stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + + stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); + + stub_syscall1(__NR_exit, 14); + + __builtin_unreachable(); +} + +void _start(void) +{ + char *alloc; + + /* Make enough space for the stub (including space for alignment) */ + alloc = __builtin_alloca((1 + 2 * STUB_DATA_PAGES - 1) * UM_KERN_PAGE_SIZE); + asm volatile("" : "+r,m"(alloc) : : "memory"); + + real_init(); +} diff --git a/arch/um/kernel/skas/stub_exe_embed.S b/arch/um/kernel/skas/stub_exe_embed.S new file mode 100644 index 000000000000..6d8914fbe8f1 --- /dev/null +++ b/arch/um/kernel/skas/stub_exe_embed.S @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + +__INITDATA + +SYM_DATA_START(stub_exe_start) + .incbin "arch/um/kernel/skas/stub_exe" +SYM_DATA_END_LABEL(stub_exe_start, SYM_L_GLOBAL, stub_exe_end) + +__FINIT diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index cf44d386f23c..857e3deab293 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -42,7 +42,7 @@ void kasan_map_memory(void *start, size_t len) } /* Set by make_tempfile() during early boot. */ -static char *tempdir = NULL; +char *tempdir = NULL; /* Check if dir is on tmpfs. Return 0 if yes, -1 if no or error. */ static int __init check_tmpfs(const char *dir) diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index be666e02c44f..2286486bb0f3 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -10,8 +10,11 @@ #include #include #include +#include +#include #include #include +#include #include #include #include @@ -176,69 +179,131 @@ static void handle_trap(int pid, struct uml_pt_regs *regs) extern char __syscall_stub_start[]; -/** - * userspace_tramp() - userspace trampoline - * @stack: pointer to the new userspace stack page - * - * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed. - * This function will run on a temporary stack page. - * It ptrace()'es itself, then - * Two pages are mapped into the userspace address space: - * - STUB_CODE (with EXEC), which contains the skas stub code - * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel. - * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process. - * And last the process stops itself to give control to the UML kernel for this userspace process. - * - * Return: Always zero, otherwise the current userspace process is ended with non null exit() call - */ +static int stub_exe_fd; + static int userspace_tramp(void *stack) { - struct sigaction sa; - void *addr; - int fd; + char *const argv[] = { "uml-userspace", NULL }; + int pipe_fds[2]; unsigned long long offset; - unsigned long segv_handler = STUB_CODE + - (unsigned long) stub_segv_handler - - (unsigned long) __syscall_stub_start; + struct stub_init_data init_data = { + .stub_start = STUB_START, + .segv_handler = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) __syscall_stub_start, + }; + struct iomem_region *iomem; + int ret; - ptrace(PTRACE_TRACEME, 0, 0, 0); + init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), + &offset); + init_data.stub_code_offset = MMAP_OFFSET(offset); - signal(SIGTERM, SIG_DFL); - signal(SIGWINCH, SIG_IGN); + init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); + init_data.stub_data_offset = MMAP_OFFSET(offset); - fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); - addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, - PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); - if (addr == MAP_FAILED) { - os_info("mapping mmap stub at 0x%lx failed, errno = %d\n", - STUB_CODE, errno); - exit(1); + /* Set CLOEXEC on all FDs and then unset on all memory related FDs */ + close_range(0, ~0U, CLOSE_RANGE_CLOEXEC); + + fcntl(init_data.stub_data_fd, F_SETFD, 0); + for (iomem = iomem_regions; iomem; iomem = iomem->next) + fcntl(iomem->fd, F_SETFD, 0); + + /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */ + if (pipe(pipe_fds)) + exit(2); + + if (dup2(pipe_fds[0], 0) < 0) + exit(3); + close(pipe_fds[0]); + + /* Write init_data and close write side */ + ret = write(pipe_fds[1], &init_data, sizeof(init_data)); + close(pipe_fds[1]); + + if (ret != sizeof(init_data)) + exit(4); + + execveat(stub_exe_fd, "", argv, NULL, AT_EMPTY_PATH); + + exit(5); +} + +extern char stub_exe_start[]; +extern char stub_exe_end[]; + +extern char *tempdir; + +#define STUB_EXE_NAME_TEMPLATE "/uml-userspace-XXXXXX" + +#ifndef MFD_EXEC +#define MFD_EXEC 0x0010U +#endif + +static int __init init_stub_exe_fd(void) +{ + size_t written = 0; + char *tmpfile = NULL; + + stub_exe_fd = memfd_create("uml-userspace", + MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING); + + if (stub_exe_fd < 0) { + printk(UM_KERN_INFO "Could not create executable memfd, using temporary file!"); + + tmpfile = malloc(strlen(tempdir) + + strlen(STUB_EXE_NAME_TEMPLATE) + 1); + if (tmpfile == NULL) + panic("Failed to allocate memory for stub binary name"); + + strcpy(tmpfile, tempdir); + strcat(tmpfile, STUB_EXE_NAME_TEMPLATE); + + stub_exe_fd = mkstemp(tmpfile); + if (stub_exe_fd < 0) + panic("Could not create temporary file for stub binary: %d", + -errno); } - fd = phys_mapping(uml_to_phys(stack), &offset); - addr = mmap((void *) STUB_DATA, - STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_SHARED, fd, offset); - if (addr == MAP_FAILED) { - os_info("mapping segfault stack at 0x%lx failed, errno = %d\n", - STUB_DATA, errno); - exit(1); + while (written < stub_exe_end - stub_exe_start) { + ssize_t res = write(stub_exe_fd, stub_exe_start + written, + stub_exe_end - stub_exe_start - written); + if (res < 0) { + if (errno == EINTR) + continue; + + if (tmpfile) + unlink(tmpfile); + panic("Failed write stub binary: %d", -errno); + } + + written += res; } - set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; - sa.sa_sigaction = (void *) segv_handler; - sa.sa_restorer = NULL; - if (sigaction(SIGSEGV, &sa, NULL) < 0) { - os_info("%s - setting SIGSEGV handler failed - errno = %d\n", - __func__, errno); - exit(1); + if (!tmpfile) { + fcntl(stub_exe_fd, F_ADD_SEALS, + F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL); + } else { + if (fchmod(stub_exe_fd, 00500) < 0) { + unlink(tmpfile); + panic("Could not make stub binary executable: %d", + -errno); + } + + close(stub_exe_fd); + stub_exe_fd = open(tmpfile, O_RDONLY | O_CLOEXEC | O_NOFOLLOW); + if (stub_exe_fd < 0) { + unlink(tmpfile); + panic("Could not reopen stub binary: %d", -errno); + } + + unlink(tmpfile); + free(tmpfile); } - kill(os_getpid(), SIGSTOP); return 0; } +__initcall(init_stub_exe_fd); int userspace_pid[NR_CPUS]; @@ -257,7 +322,7 @@ int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n, flags, err; + int pid, status, n, err; /* setup a temporary stack page */ stack = mmap(NULL, UM_KERN_PAGE_SIZE, @@ -273,10 +338,10 @@ int start_userspace(unsigned long stub_stack) /* set stack pointer to the end of the stack page, so it can grow downwards */ sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; - flags = CLONE_FILES | SIGCHLD; - /* clone into new userspace process */ - pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); + pid = clone(userspace_tramp, (void *) sp, + CLONE_VFORK | CLONE_VM | SIGCHLD, + (void *)stub_stack); if (pid < 0) { err = -errno; printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",