Merge x86 bugfixes from Linux 6.9-rc3

Pull fix for SEV-SNP late disable bugs.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini
2024-04-19 08:57:14 -04:00
773 changed files with 9783 additions and 4491 deletions

View File

@@ -4,7 +4,7 @@ ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o

View File

@@ -38,7 +38,7 @@
/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
#define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8)
#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ)
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
struct bpf_arena {
struct bpf_map map;
@@ -110,7 +110,7 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
return ERR_PTR(-EINVAL);
vm_range = (u64)attr->max_entries * PAGE_SIZE;
if (vm_range > (1ull << 32))
if (vm_range > SZ_4G)
return ERR_PTR(-E2BIG);
if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32))
@@ -301,7 +301,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad
if (pgoff)
return -EINVAL;
if (len > (1ull << 32))
if (len > SZ_4G)
return -E2BIG;
/* if user_vm_start was specified at arena creation time */
@@ -322,7 +322,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad
if (WARN_ON_ONCE(arena->user_vm_start))
/* checks at map creation time should prevent this */
return -EFAULT;
return round_up(ret, 1ull << 32);
return round_up(ret, SZ_4G);
}
static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
@@ -346,7 +346,7 @@ static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
return -EBUSY;
/* Earlier checks should prevent this */
if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff))
if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > SZ_4G || vma->vm_pgoff))
return -EFAULT;
if (remember_vma(arena, vma))
@@ -420,7 +420,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
if (uaddr & ~PAGE_MASK)
return 0;
pgoff = compute_pgoff(arena, uaddr);
if (pgoff + page_cnt > page_cnt_max)
if (pgoff > page_cnt_max - page_cnt)
/* requested address will be outside of user VMA */
return 0;
}
@@ -447,7 +447,13 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
goto out;
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
/* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */
/* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
* will not overflow 32-bit. Lower 32-bit need to represent
* contiguous user address range.
* Map these pages at kern_vm_start base.
* kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
* lower 32-bit and it's ok.
*/
ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32,
kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages);
if (ret) {
@@ -510,6 +516,11 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
if (!page)
continue;
if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */
/* Optimization for the common case of page_cnt==1:
* If page wasn't mapped into some user vma there
* is no need to call zap_pages which is slow. When
* page_cnt is big it's faster to do the batched zap.
*/
zap_pages(arena, full_uaddr, 1);
vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE);
__free_page(page);

View File

@@ -80,6 +80,18 @@ static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key
return -EOPNOTSUPP;
}
/* Called from syscall */
static int bloom_map_alloc_check(union bpf_attr *attr)
{
if (attr->value_size > KMALLOC_MAX_SIZE)
/* if value_size is bigger, the user space won't be able to
* access the elements.
*/
return -E2BIG;
return 0;
}
static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
{
u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits;
@@ -191,6 +203,7 @@ static u64 bloom_map_mem_usage(const struct bpf_map *map)
BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter)
const struct bpf_map_ops bloom_filter_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bloom_map_alloc_check,
.map_alloc = bloom_map_alloc,
.map_free = bloom_map_free,
.map_get_next_key = bloom_map_get_next_key,

View File

@@ -2548,7 +2548,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
#ifdef CONFIG_KEXEC_CORE
#ifdef CONFIG_CRASH_DUMP
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)

View File

@@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link)
atomic64_inc(&link->refcnt);
}
static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
{
struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
/* free bpf_link and its containing memory */
link->ops->dealloc_deferred(link);
}
static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
{
if (rcu_trace_implies_rcu_gp())
bpf_link_defer_dealloc_rcu_gp(rcu);
else
call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
}
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
bool sleepable = false;
bpf_link_free_id(link->id);
if (link->prog) {
sleepable = link->prog->sleepable;
/* detach BPF program, clean up used resources */
link->ops->release(link);
bpf_prog_put(link->prog);
}
/* free bpf_link and its containing memory */
link->ops->dealloc(link);
if (link->ops->dealloc_deferred) {
/* schedule BPF link deallocation; if underlying BPF program
* is sleepable, we need to first wait for RCU tasks trace
* sync, then go through "classic" RCU grace period
*/
if (sleepable)
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
else
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
}
if (link->ops->dealloc)
link->ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)
@@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
.release = bpf_raw_tp_link_release,
.dealloc = bpf_raw_tp_link_dealloc,
.dealloc_deferred = bpf_raw_tp_link_dealloc,
.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
.fill_link_info = bpf_raw_tp_link_fill_link_info,
};

View File

@@ -5682,6 +5682,13 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_FLOW_KEYS;
}
static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
{
const struct bpf_reg_state *reg = reg_state(env, regno);
return reg->type == PTR_TO_ARENA;
}
static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
#ifdef CONFIG_NET
[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
@@ -6694,6 +6701,11 @@ static int check_stack_access_within_bounds(
err = check_stack_slot_within_bounds(env, min_off, state, type);
if (!err && max_off > 0)
err = -EINVAL; /* out of stack access into non-negative offsets */
if (!err && access_size < 0)
/* access_size should not be negative (or overflow an int); others checks
* along the way should have prevented such an access.
*/
err = -EFAULT; /* invalid negative access size; integer overflow? */
if (err) {
if (tnum_is_const(reg->var_off)) {
@@ -7019,7 +7031,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
if (is_ctx_reg(env, insn->dst_reg) ||
is_pkt_reg(env, insn->dst_reg) ||
is_flow_key_reg(env, insn->dst_reg) ||
is_sk_reg(env, insn->dst_reg)) {
is_sk_reg(env, insn->dst_reg) ||
is_arena_reg(env, insn->dst_reg)) {
verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str(env, reg_state(env, insn->dst_reg)->type));
@@ -14014,6 +14027,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
return -EINVAL;
}
if (!env->prog->aux->arena) {
verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
return -EINVAL;
}
} else {
if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
insn->off != 32) || insn->imm) {
@@ -14046,8 +14063,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (insn->imm) {
/* off == BPF_ADDR_SPACE_CAST */
mark_reg_unknown(env, regs, insn->dst_reg);
if (insn->imm == 1) /* cast from as(1) to as(0) */
if (insn->imm == 1) { /* cast from as(1) to as(0) */
dst_reg->type = PTR_TO_ARENA;
/* PTR_TO_ARENA is 32-bit */
dst_reg->subreg_def = env->insn_idx + 1;
}
} else if (insn->off == 0) {
/* case: R1 = R2
* copy register state to dest reg
@@ -18359,15 +18379,18 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
}
if (!env->prog->jit_requested) {
verbose(env, "JIT is required to use arena\n");
fdput(f);
return -EOPNOTSUPP;
}
if (!bpf_jit_supports_arena()) {
verbose(env, "JIT doesn't support arena\n");
fdput(f);
return -EOPNOTSUPP;
}
env->prog->aux->arena = (void *)map;
if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
verbose(env, "arena's user address must be set via map_extra or mmap()\n");
fdput(f);
return -EINVAL;
}
}
@@ -19601,8 +19624,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
(((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
/* convert to 32-bit mov that clears upper 32-bit */
insn->code = BPF_ALU | BPF_MOV | BPF_X;
/* clear off, so it's a normal 'wX = wY' from JIT pov */
/* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
insn->off = 0;
insn->imm = 0;
} /* cast from as(0) to as(1) should be handled by JIT */
goto next_insn;
}

View File

@@ -366,7 +366,9 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
insert_resource(&iomem_resource, &crashk_low_res);
#endif
#endif
return 0;
}
@@ -448,8 +450,12 @@ void __init reserve_crashkernel_generic(char *cmdline,
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
insert_resource(&iomem_resource, &crashk_res);
#endif
}
#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
static __init int insert_crashkernel_resources(void)
{
if (crashk_res.start < crashk_res.end)
@@ -462,3 +468,4 @@ static __init int insert_crashkernel_resources(void)
}
early_initcall(insert_crashkernel_resources);
#endif
#endif

View File

@@ -1643,8 +1643,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
if (!((old->flags & new->flags) & IRQF_SHARED) ||
(oldtype != (new->flags & IRQF_TRIGGER_MASK)) ||
((old->flags ^ new->flags) & IRQF_ONESHOT))
(oldtype != (new->flags & IRQF_TRIGGER_MASK)))
goto mismatch;
if ((old->flags & IRQF_ONESHOT) &&
(new->flags & IRQF_COND_ONESHOT))
new->flags |= IRQF_ONESHOT;
else if ((old->flags ^ new->flags) & IRQF_ONESHOT)
goto mismatch;
/* All handlers must agree on per-cpuness */

View File

@@ -236,6 +236,10 @@ choice
possible to load a signed module containing the algorithm to check
the signature on that module.
config MODULE_SIG_SHA1
bool "Sign modules with SHA-1"
select CRYPTO_SHA1
config MODULE_SIG_SHA256
bool "Sign modules with SHA-256"
select CRYPTO_SHA256
@@ -265,6 +269,7 @@ endchoice
config MODULE_SIG_HASH
string
depends on MODULE_SIG || IMA_APPRAISE_MODSIG
default "sha1" if MODULE_SIG_SHA1
default "sha256" if MODULE_SIG_SHA256
default "sha384" if MODULE_SIG_SHA384
default "sha512" if MODULE_SIG_SHA512

View File

@@ -2009,6 +2009,12 @@ static int console_trylock_spinning(void)
*/
mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
/*
* Update @console_may_schedule for trylock because the previous
* owner may have been schedulable.
*/
console_may_schedule = 0;
return 1;
}

View File

@@ -2408,8 +2408,11 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
return -EINVAL;
/* PARISC cannot allow mdwe as it needs writable stacks */
if (IS_ENABLED(CONFIG_PARISC))
/*
* EOPNOTSUPP might be more appropriate here in principle, but
* existing userspace depends on EINVAL specifically.
*/
if (!arch_memory_deny_write_exec_supported())
return -EINVAL;
current_bits = get_current_mdwe();

View File

@@ -129,15 +129,17 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
goto out;
}
pccontext->clk = clk;
fp->private_data = pccontext;
if (clk->ops.open)
if (clk->ops.open) {
err = clk->ops.open(pccontext, fp->f_mode);
else
err = 0;
if (!err) {
get_device(clk->dev);
if (err) {
kfree(pccontext);
goto out;
}
}
fp->private_data = pccontext;
get_device(clk->dev);
err = 0;
out:
up_read(&clk->rwsem);
return err;

View File

@@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
.dealloc = bpf_kprobe_multi_link_dealloc,
.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
};
@@ -3157,6 +3157,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
if (umulti_link->task)
put_task_struct(umulti_link->task);
path_put(&umulti_link->path);
}
static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
@@ -3164,9 +3167,6 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
struct bpf_uprobe_multi_link *umulti_link;
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
if (umulti_link->task)
put_task_struct(umulti_link->task);
path_put(&umulti_link->path);
kvfree(umulti_link->uprobes);
kfree(umulti_link);
}
@@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
.release = bpf_uprobe_multi_link_release,
.dealloc = bpf_uprobe_multi_link_dealloc,
.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
};

View File

@@ -839,7 +839,7 @@ int traceprobe_get_entry_data_size(struct trace_probe *tp)
void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs)
{
struct probe_entry_arg *earg = tp->entry_arg;
unsigned long val;
unsigned long val = 0;
int i;
if (!earg)