mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 13:30:45 -05:00
While restricting access,
a7e1f67ed2 ("x86/msr: Filter MSR writes")
also added warning and started tainting the kernel.
But the warning message never mentioned tainting. Moreover, this uses the
"CPU_OUT_OF_SPEC" flag which is not clearly related to MSRs: that flag is
overloaded by several, fairly different situations, including some much
scarier ones.
So, without an expert around (thank you Dave Hansen), it would have been
practically impossible to root cause the tainting from just the log file at
hand. So it would be prudent to explicitly mention in the logs when the
tainting happens so that debugging crashes can be made easier.
Fix this by simply appending the CPU_OUT_OF_SPEC flag to the warning message.
This readability issue happened when staring at logs involving the Intel
Memory Latency Checker (among many other things going on in that log). The MLC
disables hardware prefetch.
[ bp: Massage and extend commit message. ]
Signed-off-by: Marc Herbert <marc.herbert@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20251101-tainted-msr-v1-1-e00658ba04d4@linux.intel.com
335 lines
7.2 KiB
C
335 lines
7.2 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/* ----------------------------------------------------------------------- *
|
|
*
|
|
* Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
|
|
* Copyright 2009 Intel Corporation; author: H. Peter Anvin
|
|
*
|
|
* ----------------------------------------------------------------------- */
|
|
|
|
/*
|
|
* x86 MSR access device
|
|
*
|
|
* This device is accessed by lseek() to the appropriate register number
|
|
* and then read/write in chunks of 8 bytes. A larger size means multiple
|
|
* reads or writes of the same register.
|
|
*
|
|
* This driver uses /dev/cpu/%d/msr where %d is the minor number, and on
|
|
* an SMP box will direct the access to CPU %d.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/init.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/major.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/device.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/security.h>
|
|
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/msr.h>
|
|
|
|
static enum cpuhp_state cpuhp_msr_state;
|
|
|
|
enum allow_write_msrs {
|
|
MSR_WRITES_ON,
|
|
MSR_WRITES_OFF,
|
|
MSR_WRITES_DEFAULT,
|
|
};
|
|
|
|
static enum allow_write_msrs allow_writes = MSR_WRITES_DEFAULT;
|
|
|
|
static ssize_t msr_read(struct file *file, char __user *buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
u32 __user *tmp = (u32 __user *) buf;
|
|
u32 data[2];
|
|
u32 reg = *ppos;
|
|
int cpu = iminor(file_inode(file));
|
|
int err = 0;
|
|
ssize_t bytes = 0;
|
|
|
|
if (count % 8)
|
|
return -EINVAL; /* Invalid chunk size */
|
|
|
|
for (; count; count -= 8) {
|
|
err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]);
|
|
if (err)
|
|
break;
|
|
if (copy_to_user(tmp, &data, 8)) {
|
|
err = -EFAULT;
|
|
break;
|
|
}
|
|
tmp += 2;
|
|
bytes += 8;
|
|
}
|
|
|
|
return bytes ? bytes : err;
|
|
}
|
|
|
|
static int filter_write(u32 reg)
|
|
{
|
|
/*
|
|
* MSRs writes usually happen all at once, and can easily saturate kmsg.
|
|
* Only allow one message every 30 seconds.
|
|
*
|
|
* It's possible to be smarter here and do it (for example) per-MSR, but
|
|
* it would certainly be more complex, and this is enough at least to
|
|
* avoid saturating the ring buffer.
|
|
*/
|
|
static DEFINE_RATELIMIT_STATE(fw_rs, 30 * HZ, 1);
|
|
|
|
switch (allow_writes) {
|
|
case MSR_WRITES_ON: return 0;
|
|
case MSR_WRITES_OFF: return -EPERM;
|
|
default: break;
|
|
}
|
|
|
|
if (!__ratelimit(&fw_rs))
|
|
return 0;
|
|
|
|
pr_warn("Write to unrecognized MSR 0x%x by %s (pid: %d), tainting CPU_OUT_OF_SPEC.\n",
|
|
reg, current->comm, current->pid);
|
|
pr_warn("See https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/about for details.\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static ssize_t msr_write(struct file *file, const char __user *buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
const u32 __user *tmp = (const u32 __user *)buf;
|
|
u32 data[2];
|
|
u32 reg = *ppos;
|
|
int cpu = iminor(file_inode(file));
|
|
int err = 0;
|
|
ssize_t bytes = 0;
|
|
|
|
err = security_locked_down(LOCKDOWN_MSR);
|
|
if (err)
|
|
return err;
|
|
|
|
err = filter_write(reg);
|
|
if (err)
|
|
return err;
|
|
|
|
if (count % 8)
|
|
return -EINVAL; /* Invalid chunk size */
|
|
|
|
for (; count; count -= 8) {
|
|
if (copy_from_user(&data, tmp, 8)) {
|
|
err = -EFAULT;
|
|
break;
|
|
}
|
|
|
|
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
|
|
|
|
err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]);
|
|
if (err)
|
|
break;
|
|
|
|
tmp += 2;
|
|
bytes += 8;
|
|
}
|
|
|
|
return bytes ? bytes : err;
|
|
}
|
|
|
|
static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
|
|
{
|
|
u32 __user *uregs = (u32 __user *)arg;
|
|
u32 regs[8];
|
|
int cpu = iminor(file_inode(file));
|
|
int err;
|
|
|
|
switch (ioc) {
|
|
case X86_IOC_RDMSR_REGS:
|
|
if (!(file->f_mode & FMODE_READ)) {
|
|
err = -EBADF;
|
|
break;
|
|
}
|
|
if (copy_from_user(®s, uregs, sizeof(regs))) {
|
|
err = -EFAULT;
|
|
break;
|
|
}
|
|
err = rdmsr_safe_regs_on_cpu(cpu, regs);
|
|
if (err)
|
|
break;
|
|
if (copy_to_user(uregs, ®s, sizeof(regs)))
|
|
err = -EFAULT;
|
|
break;
|
|
|
|
case X86_IOC_WRMSR_REGS:
|
|
if (!(file->f_mode & FMODE_WRITE)) {
|
|
err = -EBADF;
|
|
break;
|
|
}
|
|
if (copy_from_user(®s, uregs, sizeof(regs))) {
|
|
err = -EFAULT;
|
|
break;
|
|
}
|
|
err = security_locked_down(LOCKDOWN_MSR);
|
|
if (err)
|
|
break;
|
|
|
|
err = filter_write(regs[1]);
|
|
if (err)
|
|
return err;
|
|
|
|
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
|
|
|
|
err = wrmsr_safe_regs_on_cpu(cpu, regs);
|
|
if (err)
|
|
break;
|
|
if (copy_to_user(uregs, ®s, sizeof(regs)))
|
|
err = -EFAULT;
|
|
break;
|
|
|
|
default:
|
|
err = -ENOTTY;
|
|
break;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static int msr_open(struct inode *inode, struct file *file)
|
|
{
|
|
unsigned int cpu = iminor(file_inode(file));
|
|
struct cpuinfo_x86 *c;
|
|
|
|
if (!capable(CAP_SYS_RAWIO))
|
|
return -EPERM;
|
|
|
|
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
|
|
return -ENXIO; /* No such CPU */
|
|
|
|
c = &cpu_data(cpu);
|
|
if (!cpu_has(c, X86_FEATURE_MSR))
|
|
return -EIO; /* MSR not supported */
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* File operations we support
|
|
*/
|
|
static const struct file_operations msr_fops = {
|
|
.owner = THIS_MODULE,
|
|
.llseek = no_seek_end_llseek,
|
|
.read = msr_read,
|
|
.write = msr_write,
|
|
.open = msr_open,
|
|
.unlocked_ioctl = msr_ioctl,
|
|
.compat_ioctl = msr_ioctl,
|
|
};
|
|
|
|
static char *msr_devnode(const struct device *dev, umode_t *mode)
|
|
{
|
|
return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
|
|
}
|
|
|
|
static const struct class msr_class = {
|
|
.name = "msr",
|
|
.devnode = msr_devnode,
|
|
};
|
|
|
|
static int msr_device_create(unsigned int cpu)
|
|
{
|
|
struct device *dev;
|
|
|
|
dev = device_create(&msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL,
|
|
"msr%d", cpu);
|
|
return PTR_ERR_OR_ZERO(dev);
|
|
}
|
|
|
|
static int msr_device_destroy(unsigned int cpu)
|
|
{
|
|
device_destroy(&msr_class, MKDEV(MSR_MAJOR, cpu));
|
|
return 0;
|
|
}
|
|
|
|
static int __init msr_init(void)
|
|
{
|
|
int err;
|
|
|
|
if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) {
|
|
pr_err("unable to get major %d for msr\n", MSR_MAJOR);
|
|
return -EBUSY;
|
|
}
|
|
err = class_register(&msr_class);
|
|
if (err)
|
|
goto out_chrdev;
|
|
|
|
err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/msr:online",
|
|
msr_device_create, msr_device_destroy);
|
|
if (err < 0)
|
|
goto out_class;
|
|
cpuhp_msr_state = err;
|
|
return 0;
|
|
|
|
out_class:
|
|
class_unregister(&msr_class);
|
|
out_chrdev:
|
|
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
|
|
return err;
|
|
}
|
|
module_init(msr_init);
|
|
|
|
static void __exit msr_exit(void)
|
|
{
|
|
cpuhp_remove_state(cpuhp_msr_state);
|
|
class_unregister(&msr_class);
|
|
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
|
|
}
|
|
module_exit(msr_exit)
|
|
|
|
static int set_allow_writes(const char *val, const struct kernel_param *cp)
|
|
{
|
|
/* val is NUL-terminated, see kernfs_fop_write() */
|
|
char *s = strstrip((char *)val);
|
|
|
|
if (!strcmp(s, "on"))
|
|
allow_writes = MSR_WRITES_ON;
|
|
else if (!strcmp(s, "off"))
|
|
allow_writes = MSR_WRITES_OFF;
|
|
else
|
|
allow_writes = MSR_WRITES_DEFAULT;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int get_allow_writes(char *buf, const struct kernel_param *kp)
|
|
{
|
|
const char *res;
|
|
|
|
switch (allow_writes) {
|
|
case MSR_WRITES_ON: res = "on"; break;
|
|
case MSR_WRITES_OFF: res = "off"; break;
|
|
default: res = "default"; break;
|
|
}
|
|
|
|
return sprintf(buf, "%s\n", res);
|
|
}
|
|
|
|
static const struct kernel_param_ops allow_writes_ops = {
|
|
.set = set_allow_writes,
|
|
.get = get_allow_writes
|
|
};
|
|
|
|
module_param_cb(allow_writes, &allow_writes_ops, NULL, 0600);
|
|
|
|
MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
|
|
MODULE_DESCRIPTION("x86 generic MSR driver");
|
|
MODULE_LICENSE("GPL");
|