From 6553c68bc73dccfb3c8a9971ee40ed378adc0df4 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 6 Oct 2025 15:10:27 +0000 Subject: [PATCH 01/18] RAS/AMD/ATL: Return error codes from helper functions Pass up error codes from helper functions rather than discarding them. Suggested-by: Mario Limonciello Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) --- drivers/ras/amd/atl/core.c | 7 +++++-- drivers/ras/amd/atl/system.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c index 4197e10993ac..0f7cd6dab0b0 100644 --- a/drivers/ras/amd/atl/core.c +++ b/drivers/ras/amd/atl/core.c @@ -194,6 +194,8 @@ MODULE_DEVICE_TABLE(x86cpu, amd_atl_cpuids); static int __init amd_atl_init(void) { + int ret; + if (!x86_match_cpu(amd_atl_cpuids)) return -ENODEV; @@ -202,8 +204,9 @@ static int __init amd_atl_init(void) check_for_legacy_df_access(); - if (get_df_system_info()) - return -ENODEV; + ret = get_df_system_info(); + if (ret) + return ret; /* Increment this module's recount so that it can't be easily unloaded. */ __module_get(THIS_MODULE); diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index e18d916d5e8b..099841433cf9 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -288,10 +288,13 @@ static void dump_df_cfg(void) int get_df_system_info(void) { - if (determine_df_rev()) { + int ret; + + ret = determine_df_rev(); + if (ret) { pr_warn("Failed to determine DF Revision"); df_cfg.rev = UNKNOWN; - return -EINVAL; + return ret; } apply_node_id_shift(); From 83be4bee57f0374ff751aaff3fef4af0af66ec81 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 17 Oct 2025 13:26:28 +0000 Subject: [PATCH 02/18] ACPI: PRM: Add acpi_prm_handler_available() Add a helper function to check if a PRM handler/module is present. This can be used during init time by code that depends on a particular handler. If the handler is not present, then the code does not need to be loaded. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: "Mario Limonciello (AMD)" Acked-by: "Rafael J. Wysocki (Intel)" Link: https://patch.msgid.link/all/20251017-wip-atl-prm-v2-1-7ab1df4a5fbc@amd.com --- drivers/acpi/prmt.c | 6 ++++++ include/linux/prmt.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 6792d4385eee..7b8b5d2015ec 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -244,6 +244,12 @@ static struct prm_handler_info *find_prm_handler(const guid_t *guid) return (struct prm_handler_info *) find_guid_info(guid, GET_HANDLER); } +bool acpi_prm_handler_available(const guid_t *guid) +{ + return find_prm_handler(guid) && find_prm_module(guid); +} +EXPORT_SYMBOL_GPL(acpi_prm_handler_available); + /* In-coming PRM commands */ #define PRM_CMD_RUN_SERVICE 0 diff --git a/include/linux/prmt.h b/include/linux/prmt.h index c53ab287e932..8cdc987de963 100644 --- a/include/linux/prmt.h +++ b/include/linux/prmt.h @@ -4,9 +4,11 @@ #ifdef CONFIG_ACPI_PRMT void init_prmt(void); +bool acpi_prm_handler_available(const guid_t *handler_guid); int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer); #else static inline void init_prmt(void) { } +static inline bool acpi_prm_handler_available(const guid_t *handler_guid) { return false; } static inline int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer) { return -EOPNOTSUPP; From 187d1b27a1e436822c31e43aa58505f6dd8987e2 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 17 Oct 2025 13:26:29 +0000 Subject: [PATCH 03/18] RAS/AMD/ATL: Require PRM support for future systems Currently, the AMD Address Translation Library will fail to load for new, unrecognized systems (based on Data Fabric revision). The intention is to prevent the code from executing on new systems and returning incorrect results. Recent AMD systems, however, may provide UEFI PRM handlers for address translation. This is code provided by the platform through BIOS tables. These are the preferred method for translation, and the Linux native code can be used as a fallback. Future AMD systems are expected to provide PRM handlers by default. And Linux native code will not be used. Adjust the ATL init code so that new, unrecognized systems will default to using PRM handlers only. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: "Mario Limonciello (AMD)" Link: https://patch.msgid.link/all/20251017-wip-atl-prm-v2-2-7ab1df4a5fbc@amd.com --- drivers/ras/amd/atl/internal.h | 6 +++++- drivers/ras/amd/atl/prm.c | 4 ---- drivers/ras/amd/atl/system.c | 23 +++++++++++++++++------ drivers/ras/amd/atl/umc.c | 2 +- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 2b6279d32774..82a56d9c2be1 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -138,7 +138,8 @@ struct df_flags { __u8 legacy_ficaa : 1, socket_id_shift_quirk : 1, heterogeneous : 1, - __reserved_0 : 5; + prm_only : 1, + __reserved_0 : 4; }; struct df_config { @@ -283,6 +284,9 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err); u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr); u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr); +/* GUIDs for PRM handlers */ +extern const guid_t norm_to_sys_guid; + #ifdef CONFIG_AMD_ATL_PRM unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id, unsigned long addr); #else diff --git a/drivers/ras/amd/atl/prm.c b/drivers/ras/amd/atl/prm.c index 0931a20d213b..0f9bfa96e16a 100644 --- a/drivers/ras/amd/atl/prm.c +++ b/drivers/ras/amd/atl/prm.c @@ -29,10 +29,6 @@ struct norm_to_sys_param_buf { void *out_buf; } __packed; -static const guid_t norm_to_sys_guid = GUID_INIT(0xE7180659, 0xA65D, 0x451D, - 0x92, 0xCD, 0x2B, 0x56, 0xF1, - 0x2B, 0xEB, 0xA6); - unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 bank_id, unsigned long addr) { struct norm_to_sys_param_buf p_buf; diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index 099841433cf9..812a30e21d3a 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -12,6 +12,12 @@ #include "internal.h" +#include + +const guid_t norm_to_sys_guid = GUID_INIT(0xE7180659, 0xA65D, 0x451D, + 0x92, 0xCD, 0x2B, 0x56, 0xF1, + 0x2B, 0xEB, 0xA6); + int determine_node_id(struct addr_ctx *ctx, u8 socket_id, u8 die_id) { u16 socket_id_bits, die_id_bits; @@ -212,15 +218,17 @@ static int determine_df_rev(void) if (!rev) return determine_df_rev_legacy(); - /* - * Fail out for major revisions other than '4'. - * - * Explicit support should be added for newer systems to avoid issues. - */ if (rev == 4) return df4_determine_df_rev(reg); - return -EINVAL; + /* All other systems should have PRM handlers. */ + if (!acpi_prm_handler_available(&norm_to_sys_guid)) { + pr_debug("PRM not available\n"); + return -ENODEV; + } + + df_cfg.flags.prm_only = true; + return 0; } static int get_dram_hole_base(void) @@ -297,6 +305,9 @@ int get_df_system_info(void) return ret; } + if (df_cfg.flags.prm_only) + return 0; + apply_node_id_shift(); get_num_maps(); diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 6e072b7667e9..18ce419236a5 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -422,7 +422,7 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err) socket_id, die_id, coh_st_inst_id, addr); ret_addr = prm_umc_norm_to_sys_addr(socket_id, err->ipid, addr); - if (!IS_ERR_VALUE(ret_addr)) + if (!IS_ERR_VALUE(ret_addr) || df_cfg.flags.prm_only) return ret_addr; return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr); From e9abd990aefd7d8a64f90fff2d391e42b8d512f7 Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Thu, 6 Nov 2025 01:54:44 +0000 Subject: [PATCH 04/18] EDAC/amd64: Generate ctl_name string at runtime Currently, the ctl_name string is statically assigned based on the family and model of the SOC when the amd64_edac module is loaded. The same, however, is not exactly needed as the string can be generated and assigned at runtime through scnprintf(). Remove all static assignments and generate the string at runtime. Also, cleanup the switch cases which became defunct and consolidate identical cases. Signed-off-by: Avadhut Naik Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20251106015727.1987246-1-avadhut.naik@amd.com --- drivers/edac/amd64_edac.c | 56 +++++++-------------------------------- drivers/edac/amd64_edac.h | 4 ++- 2 files changed, 13 insertions(+), 47 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 2f6ab783bf20..886ad075d222 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3766,6 +3766,7 @@ static int per_family_init(struct amd64_pvt *pvt) pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; pvt->fam = boot_cpu_data.x86; + char *tmp_name = NULL; pvt->max_mcs = 2; /* @@ -3779,7 +3780,7 @@ static int per_family_init(struct amd64_pvt *pvt) switch (pvt->fam) { case 0xf: - pvt->ctl_name = (pvt->ext_model >= K8_REV_F) ? + tmp_name = (pvt->ext_model >= K8_REV_F) ? "K8 revF or later" : "K8 revE or earlier"; pvt->f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP; pvt->f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL; @@ -3788,7 +3789,6 @@ static int per_family_init(struct amd64_pvt *pvt) break; case 0x10: - pvt->ctl_name = "F10h"; pvt->f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP; pvt->f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM; pvt->ops->dbam_to_cs = f10_dbam_to_chip_select; @@ -3797,12 +3797,10 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x15: switch (pvt->model) { case 0x30: - pvt->ctl_name = "F15h_M30h"; pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2; break; case 0x60: - pvt->ctl_name = "F15h_M60h"; pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2; pvt->ops->dbam_to_cs = f15_m60h_dbam_to_chip_select; @@ -3811,7 +3809,6 @@ static int per_family_init(struct amd64_pvt *pvt) /* Richland is only client */ return -ENODEV; default: - pvt->ctl_name = "F15h"; pvt->f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2; pvt->ops->dbam_to_cs = f15_dbam_to_chip_select; @@ -3822,12 +3819,10 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x16: switch (pvt->model) { case 0x30: - pvt->ctl_name = "F16h_M30h"; pvt->f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2; break; default: - pvt->ctl_name = "F16h"; pvt->f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1; pvt->f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2; break; @@ -3836,76 +3831,51 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x17: switch (pvt->model) { - case 0x10 ... 0x2f: - pvt->ctl_name = "F17h_M10h"; - break; case 0x30 ... 0x3f: - pvt->ctl_name = "F17h_M30h"; pvt->max_mcs = 8; break; - case 0x60 ... 0x6f: - pvt->ctl_name = "F17h_M60h"; - break; - case 0x70 ... 0x7f: - pvt->ctl_name = "F17h_M70h"; - break; default: - pvt->ctl_name = "F17h"; break; } break; case 0x18: - pvt->ctl_name = "F18h"; break; case 0x19: switch (pvt->model) { case 0x00 ... 0x0f: - pvt->ctl_name = "F19h"; pvt->max_mcs = 8; break; case 0x10 ... 0x1f: - pvt->ctl_name = "F19h_M10h"; pvt->max_mcs = 12; pvt->flags.zn_regs_v2 = 1; break; - case 0x20 ... 0x2f: - pvt->ctl_name = "F19h_M20h"; - break; case 0x30 ... 0x3f: if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) { - pvt->ctl_name = "MI200"; + tmp_name = "MI200"; pvt->max_mcs = 4; pvt->dram_type = MEM_HBM2; pvt->gpu_umc_base = 0x50000; pvt->ops = &gpu_ops; } else { - pvt->ctl_name = "F19h_M30h"; pvt->max_mcs = 8; } break; - case 0x50 ... 0x5f: - pvt->ctl_name = "F19h_M50h"; - break; case 0x60 ... 0x6f: - pvt->ctl_name = "F19h_M60h"; pvt->flags.zn_regs_v2 = 1; break; case 0x70 ... 0x7f: - pvt->ctl_name = "F19h_M70h"; pvt->max_mcs = 4; pvt->flags.zn_regs_v2 = 1; break; case 0x90 ... 0x9f: - pvt->ctl_name = "F19h_M90h"; pvt->max_mcs = 4; pvt->dram_type = MEM_HBM3; pvt->gpu_umc_base = 0x90000; pvt->ops = &gpu_ops; break; case 0xa0 ... 0xaf: - pvt->ctl_name = "F19h_MA0h"; pvt->max_mcs = 12; pvt->flags.zn_regs_v2 = 1; break; @@ -3915,34 +3885,22 @@ static int per_family_init(struct amd64_pvt *pvt) case 0x1A: switch (pvt->model) { case 0x00 ... 0x1f: - pvt->ctl_name = "F1Ah"; pvt->max_mcs = 12; pvt->flags.zn_regs_v2 = 1; break; case 0x40 ... 0x4f: - pvt->ctl_name = "F1Ah_M40h"; pvt->flags.zn_regs_v2 = 1; break; case 0x50 ... 0x57: - pvt->ctl_name = "F1Ah_M50h"; + case 0xc0 ... 0xc7: pvt->max_mcs = 16; pvt->flags.zn_regs_v2 = 1; break; case 0x90 ... 0x9f: - pvt->ctl_name = "F1Ah_M90h"; - pvt->max_mcs = 8; - pvt->flags.zn_regs_v2 = 1; - break; case 0xa0 ... 0xaf: - pvt->ctl_name = "F1Ah_MA0h"; pvt->max_mcs = 8; pvt->flags.zn_regs_v2 = 1; break; - case 0xc0 ... 0xc7: - pvt->ctl_name = "F1Ah_MC0h"; - pvt->max_mcs = 16; - pvt->flags.zn_regs_v2 = 1; - break; } break; @@ -3951,6 +3909,12 @@ static int per_family_init(struct amd64_pvt *pvt) return -ENODEV; } + if (tmp_name) + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), tmp_name); + else + scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), "F%02Xh_M%02Xh", + pvt->fam, pvt->model); + return 0; } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index d70b8a8d0b09..4ec6133d5179 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -101,6 +101,8 @@ #define ON true #define OFF false +#define MAX_CTL_NAMELEN 19 + /* * PCI-defined configuration space registers */ @@ -362,7 +364,7 @@ struct amd64_pvt { /* x4, x8, or x16 syndromes in use */ u8 ecc_sym_sz; - const char *ctl_name; + char ctl_name[MAX_CTL_NAMELEN]; u16 f1_id, f2_id; /* Maximum number of memory controllers per die/node. */ u8 max_mcs; From 6a857969150908ff1deebb8b368a872281d103bd Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Thu, 6 Nov 2025 01:54:45 +0000 Subject: [PATCH 05/18] EDAC/amd64: Remove NUM_CONTROLLERS macro Currently, the NUM_CONTROLLERS macro is used to limit the amount of memory controllers (UMCs) available per node. The number of UMCs available per node, however, is already cached by the max_mcs variable of struct amd64_pvt. Allocate the relevant data structures dynamically using the variable instead of static allocation through the macro. The max_mcs variable is used for legacy systems too. These systems have a max of 2 controllers. Since the default value of max_mcs, set in per_family_init(), is 2, these legacy systems are also covered. Signed-off-by: Avadhut Naik Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20251106015727.1987246-1-avadhut.naik@amd.com --- drivers/edac/amd64_edac.c | 5 +++++ drivers/edac/amd64_edac.h | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 886ad075d222..2391f3469961 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3732,6 +3732,7 @@ static void hw_info_put(struct amd64_pvt *pvt) pci_dev_put(pvt->F1); pci_dev_put(pvt->F2); kfree(pvt->umc); + kfree(pvt->csels); } static struct low_ops umc_ops = { @@ -3915,6 +3916,10 @@ static int per_family_init(struct amd64_pvt *pvt) scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), "F%02Xh_M%02Xh", pvt->fam, pvt->model); + pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL); + if (!pvt->csels) + return -ENOMEM; + return 0; } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 4ec6133d5179..1757c1b99fc8 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,7 +96,6 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 -#define NUM_CONTROLLERS 16 #define ON true #define OFF false @@ -348,7 +347,7 @@ struct amd64_pvt { u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ /* one for each DCT/UMC */ - struct chip_select csels[NUM_CONTROLLERS]; + struct chip_select *csels; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; From 8616025ae6e55b1fad4390fbb738f48c25e84216 Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Thu, 6 Nov 2025 01:54:46 +0000 Subject: [PATCH 06/18] EDAC: Remove the legacy EDAC sysfs interface Commit 199747106934 ("edac: add a new per-dimm API and make the old per-virtual-rank API obsolete") introduced a new per-DIMM sysfs interface for EDAC making the old per-virtual-rank sysfs interface obsolete. Since this new sysfs interface was introduced more than a decade ago, remove the obsolete legacy interface. Signed-off-by: Avadhut Naik Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20251106015727.1987246-1-avadhut.naik@amd.com --- Documentation/admin-guide/RAS/main.rst | 142 +------- arch/loongarch/configs/loongson3_defconfig | 1 - drivers/edac/Kconfig | 8 - drivers/edac/edac_mc_sysfs.c | 404 --------------------- 4 files changed, 3 insertions(+), 552 deletions(-) diff --git a/Documentation/admin-guide/RAS/main.rst b/Documentation/admin-guide/RAS/main.rst index 447bfde509fb..5a45db32c49b 100644 --- a/Documentation/admin-guide/RAS/main.rst +++ b/Documentation/admin-guide/RAS/main.rst @@ -406,24 +406,8 @@ index of the MC:: |->mc2 .... -Under each ``mcX`` directory each ``csrowX`` is again represented by a -``csrowX``, where ``X`` is the csrow index:: - - .../mc/mc0/ - | - |->csrow0 - |->csrow2 - |->csrow3 - .... - -Notice that there is no csrow1, which indicates that csrow0 is composed -of a single ranked DIMMs. This should also apply in both Channels, in -order to have dual-channel mode be operational. Since both csrow2 and -csrow3 are populated, this indicates a dual ranked set of DIMMs for -channels 0 and 1. - -Within each of the ``mcX`` and ``csrowX`` directories are several EDAC -control and attribute files. +Within each of the ``mcX`` directory are several EDAC control and +attribute files. ``mcX`` directories ------------------- @@ -569,7 +553,7 @@ this ``X`` memory module: - Unbuffered-DDR .. [#f5] On some systems, the memory controller doesn't have any logic - to identify the memory module. On such systems, the directory is called ``rankX`` and works on a similar way as the ``csrowX`` directories. + to identify the memory module. On such systems, the directory is called ``rankX``. On modern Intel memory controllers, the memory controller identifies the memory modules directly. On such systems, the directory is called ``dimmX``. @@ -577,126 +561,6 @@ this ``X`` memory module: symlinks inside the sysfs mapping that are automatically created by the sysfs subsystem. Currently, they serve no purpose. -``csrowX`` directories ----------------------- - -When CONFIG_EDAC_LEGACY_SYSFS is enabled, sysfs will contain the ``csrowX`` -directories. As this API doesn't work properly for Rambus, FB-DIMMs and -modern Intel Memory Controllers, this is being deprecated in favor of -``dimmX`` directories. - -In the ``csrowX`` directories are EDAC control and attribute files for -this ``X`` instance of csrow: - - -- ``ue_count`` - Total Uncorrectable Errors count attribute file - - This attribute file displays the total count of uncorrectable - errors that have occurred on this csrow. If panic_on_ue is set - this counter will not have a chance to increment, since EDAC - will panic the system. - - -- ``ce_count`` - Total Correctable Errors count attribute file - - This attribute file displays the total count of correctable - errors that have occurred on this csrow. This count is very - important to examine. CEs provide early indications that a - DIMM is beginning to fail. This count field should be - monitored for non-zero values and report such information - to the system administrator. - - -- ``size_mb`` - Total memory managed by this csrow attribute file - - This attribute file displays, in count of megabytes, the memory - that this csrow contains. - - -- ``mem_type`` - Memory Type attribute file - - This attribute file will display what type of memory is currently - on this csrow. Normally, either buffered or unbuffered memory. - Examples: - - - Registered-DDR - - Unbuffered-DDR - - -- ``edac_mode`` - EDAC Mode of operation attribute file - - This attribute file will display what type of Error detection - and correction is being utilized. - - -- ``dev_type`` - Device type attribute file - - This attribute file will display what type of DRAM device is - being utilized on this DIMM. - Examples: - - - x1 - - x2 - - x4 - - x8 - - -- ``ch0_ce_count`` - Channel 0 CE Count attribute file - - This attribute file will display the count of CEs on this - DIMM located in channel 0. - - -- ``ch0_ue_count`` - Channel 0 UE Count attribute file - - This attribute file will display the count of UEs on this - DIMM located in channel 0. - - -- ``ch0_dimm_label`` - Channel 0 DIMM Label control file - - - This control file allows this DIMM to have a label assigned - to it. With this label in the module, when errors occur - the output can provide the DIMM label in the system log. - This becomes vital for panic events to isolate the - cause of the UE event. - - DIMM Labels must be assigned after booting, with information - that correctly identifies the physical slot with its - silk screen label. This information is currently very - motherboard specific and determination of this information - must occur in userland at this time. - - -- ``ch1_ce_count`` - Channel 1 CE Count attribute file - - - This attribute file will display the count of CEs on this - DIMM located in channel 1. - - -- ``ch1_ue_count`` - Channel 1 UE Count attribute file - - - This attribute file will display the count of UEs on this - DIMM located in channel 0. - - -- ``ch1_dimm_label`` - Channel 1 DIMM Label control file - - This control file allows this DIMM to have a label assigned - to it. With this label in the module, when errors occur - the output can provide the DIMM label in the system log. - This becomes vital for panic events to isolate the - cause of the UE event. - - DIMM Labels must be assigned after booting, with information - that correctly identifies the physical slot with its - silk screen label. This information is currently very - motherboard specific and determination of this information - must occur in userland at this time. - System Logging -------------- diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 3e838c229cd5..50e1304e7a6f 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -917,7 +917,6 @@ CONFIG_MMC=y CONFIG_MMC_LOONGSON2=m CONFIG_INFINIBAND=m CONFIG_EDAC=y -# CONFIG_EDAC_LEGACY_SYSFS is not set CONFIG_EDAC_LOONGSON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 39352b9b7a7e..9a7ff42064e9 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -23,14 +23,6 @@ menuconfig EDAC if EDAC -config EDAC_LEGACY_SYSFS - bool "EDAC legacy sysfs" - default y - help - Enable the compatibility sysfs nodes. - Use 'Y' if your edac utilities aren't ported to work with the newer - structures. - config EDAC_DEBUG bool "Debugging" select DEBUG_FS diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 8689631f1905..091cc6aae8a9 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -115,401 +115,6 @@ static const char * const edac_caps[] = { [EDAC_S16ECD16ED] = "S16ECD16ED" }; -#ifdef CONFIG_EDAC_LEGACY_SYSFS -/* - * EDAC sysfs CSROW data structures and methods - */ - -#define to_csrow(k) container_of(k, struct csrow_info, dev) - -/* - * We need it to avoid namespace conflicts between the legacy API - * and the per-dimm/per-rank one - */ -#define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \ - static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) - -struct dev_ch_attribute { - struct device_attribute attr; - unsigned int channel; -}; - -#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \ - static struct dev_ch_attribute dev_attr_legacy_##_name = \ - { __ATTR(_name, _mode, _show, _store), (_var) } - -#define to_channel(k) (container_of(k, struct dev_ch_attribute, attr)->channel) - -/* Set of more default csrow attribute show/store functions */ -static ssize_t csrow_ue_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%u\n", csrow->ue_count); -} - -static ssize_t csrow_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%u\n", csrow->ce_count); -} - -static ssize_t csrow_size_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - int i; - u32 nr_pages = 0; - - for (i = 0; i < csrow->nr_channels; i++) - nr_pages += csrow->channels[i]->dimm->nr_pages; - return sysfs_emit(data, "%u\n", PAGES_TO_MiB(nr_pages)); -} - -static ssize_t csrow_mem_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%s\n", edac_mem_types[csrow->channels[0]->dimm->mtype]); -} - -static ssize_t csrow_dev_type_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%s\n", dev_types[csrow->channels[0]->dimm->dtype]); -} - -static ssize_t csrow_edac_mode_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - - return sysfs_emit(data, "%s\n", edac_caps[csrow->channels[0]->dimm->edac_mode]); -} - -/* show/store functions for DIMM Label attributes */ -static ssize_t channel_dimm_label_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - /* if field has not been initialized, there is nothing to send */ - if (!rank->dimm->label[0]) - return 0; - - return sysfs_emit(data, "%s\n", rank->dimm->label); -} - -static ssize_t channel_dimm_label_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - size_t copy_count = count; - - if (count == 0) - return -EINVAL; - - if (data[count - 1] == '\0' || data[count - 1] == '\n') - copy_count -= 1; - - if (copy_count == 0 || copy_count >= sizeof(rank->dimm->label)) - return -EINVAL; - - memcpy(rank->dimm->label, data, copy_count); - rank->dimm->label[copy_count] = '\0'; - - return count; -} - -/* show function for dynamic chX_ce_count attribute */ -static ssize_t channel_ce_count_show(struct device *dev, - struct device_attribute *mattr, char *data) -{ - struct csrow_info *csrow = to_csrow(dev); - unsigned int chan = to_channel(mattr); - struct rank_info *rank = csrow->channels[chan]; - - return sysfs_emit(data, "%u\n", rank->ce_count); -} - -/* cwrow/attribute files */ -DEVICE_ATTR_LEGACY(size_mb, S_IRUGO, csrow_size_show, NULL); -DEVICE_ATTR_LEGACY(dev_type, S_IRUGO, csrow_dev_type_show, NULL); -DEVICE_ATTR_LEGACY(mem_type, S_IRUGO, csrow_mem_type_show, NULL); -DEVICE_ATTR_LEGACY(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL); -DEVICE_ATTR_LEGACY(ue_count, S_IRUGO, csrow_ue_count_show, NULL); -DEVICE_ATTR_LEGACY(ce_count, S_IRUGO, csrow_ce_count_show, NULL); - -/* default attributes of the CSROW object */ -static struct attribute *csrow_attrs[] = { - &dev_attr_legacy_dev_type.attr, - &dev_attr_legacy_mem_type.attr, - &dev_attr_legacy_edac_mode.attr, - &dev_attr_legacy_size_mb.attr, - &dev_attr_legacy_ue_count.attr, - &dev_attr_legacy_ce_count.attr, - NULL, -}; - -static const struct attribute_group csrow_attr_grp = { - .attrs = csrow_attrs, -}; - -static const struct attribute_group *csrow_attr_groups[] = { - &csrow_attr_grp, - NULL -}; - -static const struct device_type csrow_attr_type = { - .groups = csrow_attr_groups, -}; - -/* - * possible dynamic channel DIMM Label attribute files - * - */ -DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 0); -DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 1); -DEVICE_CHANNEL(ch2_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 2); -DEVICE_CHANNEL(ch3_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 3); -DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 4); -DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 5); -DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 6); -DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 7); -DEVICE_CHANNEL(ch8_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 8); -DEVICE_CHANNEL(ch9_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 9); -DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 10); -DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 11); -DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 12); -DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 13); -DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 14); -DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR, - channel_dimm_label_show, channel_dimm_label_store, 15); - -/* Total possible dynamic DIMM Label attribute file table */ -static struct attribute *dynamic_csrow_dimm_attr[] = { - &dev_attr_legacy_ch0_dimm_label.attr.attr, - &dev_attr_legacy_ch1_dimm_label.attr.attr, - &dev_attr_legacy_ch2_dimm_label.attr.attr, - &dev_attr_legacy_ch3_dimm_label.attr.attr, - &dev_attr_legacy_ch4_dimm_label.attr.attr, - &dev_attr_legacy_ch5_dimm_label.attr.attr, - &dev_attr_legacy_ch6_dimm_label.attr.attr, - &dev_attr_legacy_ch7_dimm_label.attr.attr, - &dev_attr_legacy_ch8_dimm_label.attr.attr, - &dev_attr_legacy_ch9_dimm_label.attr.attr, - &dev_attr_legacy_ch10_dimm_label.attr.attr, - &dev_attr_legacy_ch11_dimm_label.attr.attr, - &dev_attr_legacy_ch12_dimm_label.attr.attr, - &dev_attr_legacy_ch13_dimm_label.attr.attr, - &dev_attr_legacy_ch14_dimm_label.attr.attr, - &dev_attr_legacy_ch15_dimm_label.attr.attr, - NULL -}; - -/* possible dynamic channel ce_count attribute files */ -DEVICE_CHANNEL(ch0_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 0); -DEVICE_CHANNEL(ch1_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 1); -DEVICE_CHANNEL(ch2_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 2); -DEVICE_CHANNEL(ch3_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 3); -DEVICE_CHANNEL(ch4_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 4); -DEVICE_CHANNEL(ch5_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 5); -DEVICE_CHANNEL(ch6_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 6); -DEVICE_CHANNEL(ch7_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 7); -DEVICE_CHANNEL(ch8_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 8); -DEVICE_CHANNEL(ch9_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 9); -DEVICE_CHANNEL(ch10_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 10); -DEVICE_CHANNEL(ch11_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 11); -DEVICE_CHANNEL(ch12_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 12); -DEVICE_CHANNEL(ch13_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 13); -DEVICE_CHANNEL(ch14_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 14); -DEVICE_CHANNEL(ch15_ce_count, S_IRUGO, - channel_ce_count_show, NULL, 15); - -/* Total possible dynamic ce_count attribute file table */ -static struct attribute *dynamic_csrow_ce_count_attr[] = { - &dev_attr_legacy_ch0_ce_count.attr.attr, - &dev_attr_legacy_ch1_ce_count.attr.attr, - &dev_attr_legacy_ch2_ce_count.attr.attr, - &dev_attr_legacy_ch3_ce_count.attr.attr, - &dev_attr_legacy_ch4_ce_count.attr.attr, - &dev_attr_legacy_ch5_ce_count.attr.attr, - &dev_attr_legacy_ch6_ce_count.attr.attr, - &dev_attr_legacy_ch7_ce_count.attr.attr, - &dev_attr_legacy_ch8_ce_count.attr.attr, - &dev_attr_legacy_ch9_ce_count.attr.attr, - &dev_attr_legacy_ch10_ce_count.attr.attr, - &dev_attr_legacy_ch11_ce_count.attr.attr, - &dev_attr_legacy_ch12_ce_count.attr.attr, - &dev_attr_legacy_ch13_ce_count.attr.attr, - &dev_attr_legacy_ch14_ce_count.attr.attr, - &dev_attr_legacy_ch15_ce_count.attr.attr, - NULL -}; - -static umode_t csrow_dev_is_visible(struct kobject *kobj, - struct attribute *attr, int idx) -{ - struct device *dev = kobj_to_dev(kobj); - struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); - - if (idx >= csrow->nr_channels) - return 0; - - if (idx >= ARRAY_SIZE(dynamic_csrow_ce_count_attr) - 1) { - WARN_ONCE(1, "idx: %d\n", idx); - return 0; - } - - /* Only expose populated DIMMs */ - if (!csrow->channels[idx]->dimm->nr_pages) - return 0; - - return attr->mode; -} - - -static const struct attribute_group csrow_dev_dimm_group = { - .attrs = dynamic_csrow_dimm_attr, - .is_visible = csrow_dev_is_visible, -}; - -static const struct attribute_group csrow_dev_ce_count_group = { - .attrs = dynamic_csrow_ce_count_attr, - .is_visible = csrow_dev_is_visible, -}; - -static const struct attribute_group *csrow_dev_groups[] = { - &csrow_dev_dimm_group, - &csrow_dev_ce_count_group, - NULL -}; - -static void csrow_release(struct device *dev) -{ - /* - * Nothing to do, just unregister sysfs here. The mci - * device owns the data and will also release it. - */ -} - -static inline int nr_pages_per_csrow(struct csrow_info *csrow) -{ - int chan, nr_pages = 0; - - for (chan = 0; chan < csrow->nr_channels; chan++) - nr_pages += csrow->channels[chan]->dimm->nr_pages; - - return nr_pages; -} - -/* Create a CSROW object under specified edac_mc_device */ -static int edac_create_csrow_object(struct mem_ctl_info *mci, - struct csrow_info *csrow, int index) -{ - int err; - - csrow->dev.type = &csrow_attr_type; - csrow->dev.groups = csrow_dev_groups; - csrow->dev.release = csrow_release; - device_initialize(&csrow->dev); - csrow->dev.parent = &mci->dev; - csrow->mci = mci; - dev_set_name(&csrow->dev, "csrow%d", index); - dev_set_drvdata(&csrow->dev, csrow); - - err = device_add(&csrow->dev); - if (err) { - edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev)); - put_device(&csrow->dev); - return err; - } - - edac_dbg(0, "device %s created\n", dev_name(&csrow->dev)); - - return 0; -} - -/* Create a CSROW object under specified edac_mc_device */ -static int edac_create_csrow_objects(struct mem_ctl_info *mci) -{ - int err, i; - struct csrow_info *csrow; - - for (i = 0; i < mci->nr_csrows; i++) { - csrow = mci->csrows[i]; - if (!nr_pages_per_csrow(csrow)) - continue; - err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) - goto error; - } - return 0; - -error: - for (--i; i >= 0; i--) { - if (device_is_registered(&mci->csrows[i]->dev)) - device_unregister(&mci->csrows[i]->dev); - } - - return err; -} - -static void edac_delete_csrow_objects(struct mem_ctl_info *mci) -{ - int i; - - for (i = 0; i < mci->nr_csrows; i++) { - if (device_is_registered(&mci->csrows[i]->dev)) - device_unregister(&mci->csrows[i]->dev); - } -} - -#endif - /* * Per-dimm (or per-rank) devices */ @@ -989,12 +594,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, goto fail; } -#ifdef CONFIG_EDAC_LEGACY_SYSFS - err = edac_create_csrow_objects(mci); - if (err < 0) - goto fail; -#endif - edac_create_debugfs_nodes(mci); return 0; @@ -1019,9 +618,6 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) #ifdef CONFIG_EDAC_DEBUG edac_debugfs_remove_recursive(mci->debugfs); #endif -#ifdef CONFIG_EDAC_LEGACY_SYSFS - edac_delete_csrow_objects(mci); -#endif mci_for_each_dimm(mci, dimm) { if (!device_is_registered(&dimm->dev)) From 24e3848a2e903fe614fbeef3048fa4fe91e46482 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 6 Nov 2025 12:24:54 +0100 Subject: [PATCH 07/18] RAS/CEC: Replace use of system_wq with system_percpu_wq Switch to using system_percpu_wq because system_wq is going away as part of a workqueue restructuring. Currently if a user enqueues a work item using schedule_delayed_work() the used workqueue is "system_wq" (per-cpu workqueue) while queue_delayed_work() uses WORK_CPU_UNBOUND (used when a CPU is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use of WORK_CPU_UNBOUND again. This lack of consistency cannot be addressed without refactoring the API. For more details see those commits and the Link tag below. 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") [ bp: Massage commit message. ] Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/all/20250221112003.1dSuoGyc@linutronix.de --- drivers/ras/cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index e440b15fbabc..15f7f043c8ef 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -166,7 +166,7 @@ static void cec_mod_work(unsigned long interval) unsigned long iv; iv = interval * HZ; - mod_delayed_work(system_wq, &cec_work, round_jiffies(iv)); + mod_delayed_work(system_percpu_wq, &cec_work, round_jiffies(iv)); } static void cec_work_fn(struct work_struct *work) From f18e71cd6c546e65fab25829ed9a55cd1a0928b3 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 6 Nov 2025 16:47:35 +0800 Subject: [PATCH 08/18] EDAC/ie31200: Fix error handling in ie31200_register_mci ie31200_register_mci() calls device_initialize() for priv->dev unconditionally. However, in the error path, put_device() is not called, leading to an imbalance. Similarly, in the unload path, put_device() is missing. Although edac_mc_free() eventually frees the memory, it does not release the device initialized by device_initialize(). For code readability and proper pairing of device_initialize()/put_device(), add put_device() calls in both error and unload paths. Found by code review. Signed-off-by: Ma Ke Signed-off-by: Tony Luck Reviewed-by: Qiuxu Zhuo Link: https://patch.msgid.link/20251106084735.35017-1-make24@iscas.ac.cn --- drivers/edac/ie31200_edac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 5a080ab65476..8d4ddaa85ae8 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -526,6 +526,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in ie31200_pvt.priv[mc] = priv; return 0; fail_unmap: + put_device(&priv->dev); iounmap(window); fail_free: edac_mc_free(mci); @@ -598,6 +599,7 @@ static void ie31200_unregister_mcis(void) mci = priv->mci; edac_mc_del_mc(mci->pdev); iounmap(priv->window); + put_device(&priv->dev); edac_mc_free(mci); } } From cdf5ecc3f6e1b3cc5475b879c64e16ecf6de569b Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 18 Nov 2025 14:56:22 +0100 Subject: [PATCH 09/18] EDAC/ghes: Replace deprecated strcpy() in ghes_edac_report_mem_error() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit strcpy() has been deprecated¹ because it performs no bounds checking on the destination buffer, which can lead to buffer overflows. Use the safer strscpy() instead. ¹ https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy Signed-off-by: Thorsten Blum Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Link: https://patch.msgid.link/20251118135621.101148-2-thorsten.blum@linux.dev --- drivers/edac/ghes_edac.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 1eb0136c6fbd..d80c88818691 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -15,6 +15,7 @@ #include "edac_module.h" #include #include +#include #define OTHER_DETAIL_LEN 400 @@ -332,7 +333,7 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb, p = pvt->msg; p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype)); } else { - strcpy(pvt->msg, "unknown error"); + strscpy(pvt->msg, "unknown error"); } /* Error address */ @@ -357,14 +358,14 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb, dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle); if (dimm) { e->top_layer = dimm->idx; - strcpy(e->label, dimm->label); + strscpy(e->label, dimm->label); } } if (p > e->location) *(p - 1) = '\0'; if (!*e->label) - strcpy(e->label, "unknown memory"); + strscpy(e->label, "unknown memory"); /* All other fields are mapped on e->other_detail */ p = pvt->other_detail; From b3d70059cbb262eef12927dbcf37ba8586d4a3ab Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 19 Nov 2025 12:11:40 -0800 Subject: [PATCH 10/18] EDAC/{skx_common,skx,i10nm}: Make skx_register_mci() independent of pci_dev Memory controllers in the new Intel server CPUs, such as Diamond Rapids, are presented as MMIO-based devices rather than PCI devices. Modify skx_register_mci() to be independent of 'pci_dev' and use a generic 'dev' of 'struct device' to prepare for support of such MMIO-based memory controllers. Tested-by: Yi Lai Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://patch.msgid.link/20251119134132.2389472-2-qiuxu.zhuo@intel.com --- drivers/edac/i10nm_base.c | 3 ++- drivers/edac/skx_base.c | 4 ++-- drivers/edac/skx_common.c | 10 +++++----- drivers/edac/skx_common.h | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 2010a47149f4..89b3e8cc38b1 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1198,7 +1198,8 @@ static int __init i10nm_init(void) d->imc[i].num_dimms = cfg->ddr_dimm_num; } - rc = skx_register_mci(&d->imc[i], d->imc[i].mdev, + rc = skx_register_mci(&d->imc[i], &d->imc[i].mdev->dev, + pci_name(d->imc[i].mdev), "Intel_10nm Socket", EDAC_MOD_STR, i10nm_get_dimm_config, cfg); if (rc < 0) diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 078ddf95cc6e..aa6593ccda2d 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -662,8 +662,8 @@ static int __init skx_init(void) d->imc[i].src_id = src_id; d->imc[i].num_channels = cfg->ddr_chan_num; d->imc[i].num_dimms = cfg->ddr_dimm_num; - - rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev, + rc = skx_register_mci(&d->imc[i], &d->imc[i].chan[0].cdev->dev, + pci_name(d->imc[i].chan[0].cdev), "Skylake Socket", EDAC_MOD_STR, skx_get_dimm_config, cfg); if (rc < 0) diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 724842f512ac..cf58cb5951ab 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -545,9 +545,9 @@ int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, } EXPORT_SYMBOL_GPL(skx_get_nvdimm_info); -int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, - const char *ctl_name, const char *mod_str, - get_dimm_config_f get_dimm_config, +int skx_register_mci(struct skx_imc *imc, struct device *dev, + const char *dev_name, const char *ctl_name, + const char *mod_str, get_dimm_config_f get_dimm_config, struct res_config *cfg) { struct mem_ctl_info *mci; @@ -588,7 +588,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = mod_str; - mci->dev_name = pci_name(pdev); + mci->dev_name = dev_name; mci->ctl_page_to_phys = NULL; rc = get_dimm_config(mci, cfg); @@ -596,7 +596,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, goto fail; /* Record ptr to the generic device */ - mci->pdev = &pdev->dev; + mci->pdev = dev; /* Add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 73ba89786cdf..0be088f47587 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -302,7 +302,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, const char *mod_str); -int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, +int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name, const char *ctl_name, const char *mod_str, get_dimm_config_f get_dimm_config, struct res_config *cfg); From 9529e697739e2d4dc8c4129bbe91576f744f79f6 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 19 Nov 2025 12:11:40 -0800 Subject: [PATCH 11/18] EDAC/skx_common: Prepare for skx_get_edac_list() The Intel EDAC library 'skx_common' maintains the Intel server EDAC device list for {skx, i10nm}_edac drivers, which use skx_get_all_bus_mappings() to build and retrieve the EDAC device list. However, the upcoming Intel EDAC driver, imh_edac, for Diamond Rapids servers is designed for memory controllers that are MMIO-based devices rather than PCI devices. Consequently, it can't use skx_get_all_bus_mappings() due to the absence of a PCI bus. To accommodate this, prepare skx_get_edac_list() to enable the upcoming imh_edac driver to obtain the EDAC device list from the skx_common library and build the EDAC device list independently. Tested-by: Yi Lai Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://patch.msgid.link/20251119134132.2389472-3-qiuxu.zhuo@intel.com --- drivers/edac/skx_common.c | 6 ++++++ drivers/edac/skx_common.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index cf58cb5951ab..6a1b30aa1ee6 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -384,6 +384,12 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) } EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings); +struct list_head *skx_get_edac_list(void) +{ + return &dev_edac_list; +} +EXPORT_SYMBOL_GPL(skx_get_edac_list); + int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) { struct pci_dev *pdev; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 0be088f47587..52734091a79d 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -293,6 +293,8 @@ int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); +struct list_head *skx_get_edac_list(void); + int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, From d4839582bc7002095b013acb4a2dcaa1438c41aa Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 19 Nov 2025 12:11:40 -0800 Subject: [PATCH 12/18] EDAC/skx_common: Prepare for skx_set_hi_lo() The upcoming imh_edac driver for Intel Diamond Rapids servers cannot use skx_get_hi_lo() in skx_common to retrieve the TOHM (Top of High Memory) and TOLM (Top of Low Memory) parameters. Instead, it obtains these parameters within its own EDAC driver. To accommodate this, prepare skx_set_hi_lo() to allow the driver to notify skx_common of these parameters. Tested-by: Yi Lai Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://patch.msgid.link/20251119134132.2389472-4-qiuxu.zhuo@intel.com --- drivers/edac/skx_common.c | 7 +++++++ drivers/edac/skx_common.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 6a1b30aa1ee6..e45abd0008ef 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -430,6 +430,13 @@ int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) } EXPORT_SYMBOL_GPL(skx_get_hi_lo); +void skx_set_hi_lo(u64 tolm, u64 tohm) +{ + skx_tolm = tolm; + skx_tohm = tohm; +} +EXPORT_SYMBOL_GPL(skx_set_hi_lo); + static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval, int maxval, const char *name) { diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 52734091a79d..86a883d3c2a4 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -296,6 +296,7 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); struct list_head *skx_get_edac_list(void); int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); +void skx_set_hi_lo(u64 tolm, u64 tohm); int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, From 9fc67b11703fe9d8a5617ccacec2a452e455fa52 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 19 Nov 2025 12:11:40 -0800 Subject: [PATCH 13/18] EDAC/{skx_common,imh}: Add EDAC driver for Intel Diamond Rapids servers Intel Diamond Rapids CPUs include Integrated Memory and I/O Hubs (IMH). The memory controllers within the IMHs provide memory stacks to the processor. Create a new driver for this IMH-based memory controllers rather than applying additional patches to the existing i10nm_edac.c for the following reasons: 1) The memory controllers are not presented as PCI devices; instead, the detection and all their registers have been transitioned to MMIO-based memory spaces. 2) Validation processes are costly. Modifications to i10nm_edac would require extensive validation checks against multiple platforms, including Ice Lake, Sapphire Rapids, Emerald Rapids, Granite Rapids, Sierra Forest, and Grand Ridge. 3) Future Intel CPUs will likely only need patches on top of this new EDAC driver. Validation can be limited to Diamond Rapids servers and future Intel CPU generations. [Tony: Fix kerneldoc for struct local_reg] [randconfig: Added dependencies on NFIT and DMI] Tested-by: Yi Lai Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://patch.msgid.link/20251119134132.2389472-5-qiuxu.zhuo@intel.com --- drivers/edac/Kconfig | 12 + drivers/edac/Makefile | 3 + drivers/edac/imh_base.c | 562 ++++++++++++++++++++++++++++++++++++++ drivers/edac/skx_common.c | 8 +- drivers/edac/skx_common.h | 89 ++++-- 5 files changed, 648 insertions(+), 26 deletions(-) create mode 100644 drivers/edac/imh_base.c diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 9a7ff42064e9..81e40543ffd8 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -283,6 +283,18 @@ config EDAC_I10NM system has non-volatile DIMMs you should also manually select CONFIG_ACPI_NFIT. +config EDAC_IMH + tristate "Intel Integrated Memory/IO Hub MC" + depends on X86_64 && X86_MCE_INTEL && ACPI + depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_IMH can't be y + select DMI + select ACPI_ADXL + help + Support for error detection and correction the Intel + Integrated Memory/IO Hub Memory Controller. This MC IP is + first used on the Diamond Rapids servers but may appear on + others in the future. + config EDAC_PND2 tristate "Intel Pondicherry2" depends on PCI && X86_64 && X86_MCE_INTEL diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 1c14796410a3..8429b1e856bc 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -65,6 +65,9 @@ obj-$(CONFIG_EDAC_SKX) += skx_edac.o skx_edac_common.o i10nm_edac-y := i10nm_base.o obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o +imh_edac-y := imh_base.o +obj-$(CONFIG_EDAC_IMH) += imh_edac.o skx_edac_common.o + obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o diff --git a/drivers/edac/imh_base.c b/drivers/edac/imh_base.c new file mode 100644 index 000000000000..2be27be78390 --- /dev/null +++ b/drivers/edac/imh_base.c @@ -0,0 +1,562 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Intel(R) servers with Integrated Memory/IO Hub-based memory controller. + * Copyright (c) 2025, Intel Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include "edac_module.h" +#include "skx_common.h" + +#define IMH_REVISION "v0.0.1" +#define EDAC_MOD_STR "imh_edac" + +/* Debug macros */ +#define imh_printk(level, fmt, arg...) \ + edac_printk(level, "imh", fmt, ##arg) + +/* Configuration Agent(Ubox) */ +#define MMIO_BASE_H(reg) (((u64)GET_BITFIELD(reg, 0, 29)) << 23) +#define SOCKET_ID(reg) GET_BITFIELD(reg, 0, 3) + +/* PUNIT */ +#define DDR_IMC_BITMAP(reg) GET_BITFIELD(reg, 23, 30) + +/* Memory Controller */ +#define ECC_ENABLED(reg) GET_BITFIELD(reg, 2, 2) +#define DIMM_POPULATED(reg) GET_BITFIELD(reg, 15, 15) + +/* System Cache Agent(SCA) */ +#define TOLM(reg) (((u64)GET_BITFIELD(reg, 16, 31)) << 16) +#define TOHM(reg) (((u64)GET_BITFIELD(reg, 16, 51)) << 16) + +/** + * struct local_reg - A register as described in the local package view. + * + * @pkg: (input) The package where the register is located. + * @pbase: (input) The IP MMIO base physical address in the local package view. + * @size: (input) The IP MMIO size. + * @offset: (input) The register offset from the IP MMIO base @pbase. + * @width: (input) The register width in byte. + * @vbase: (internal) The IP MMIO base virtual address. + * @val: (output) The register value. + */ +struct local_reg { + int pkg; + u64 pbase; + u32 size; + u32 offset; + u8 width; + void __iomem *vbase; + u64 val; +}; + +#define DEFINE_LOCAL_REG(name, cfg, package, north, ip_name, ip_idx, reg_name) \ + struct local_reg name = { \ + .pkg = package, \ + .pbase = (north ? (cfg)->mmio_base_l_north : \ + (cfg)->mmio_base_l_south) + \ + (cfg)->ip_name##_base + \ + (cfg)->ip_name##_size * (ip_idx), \ + .size = (cfg)->ip_name##_size, \ + .offset = (cfg)->ip_name##_reg_##reg_name##_offset, \ + .width = (cfg)->ip_name##_reg_##reg_name##_width, \ + } + +static u64 readx(void __iomem *addr, u8 width) +{ + switch (width) { + case 1: + return readb(addr); + case 2: + return readw(addr); + case 4: + return readl(addr); + case 8: + return readq(addr); + default: + imh_printk(KERN_ERR, "Invalid reg 0x%p width %d\n", addr, width); + return 0; + } +} + +static void __read_local_reg(void *reg) +{ + struct local_reg *r = (struct local_reg *)reg; + + r->val = readx(r->vbase + r->offset, r->width); +} + +/* Read a local-view register. */ +static bool read_local_reg(struct local_reg *reg) +{ + int cpu; + + /* Get the target CPU in the package @reg->pkg. */ + for_each_online_cpu(cpu) { + if (reg->pkg == topology_physical_package_id(cpu)) + break; + } + + if (cpu >= nr_cpu_ids) + return false; + + reg->vbase = ioremap(reg->pbase, reg->size); + if (!reg->vbase) { + imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", reg->pbase); + return false; + } + + /* Get the target CPU to read the register. */ + smp_call_function_single(cpu, __read_local_reg, reg, 1); + iounmap(reg->vbase); + + return true; +} + +/* Get the bitmap of memory controller instances in package @pkg. */ +static u32 get_imc_bitmap(struct res_config *cfg, int pkg, bool north) +{ + DEFINE_LOCAL_REG(reg, cfg, pkg, north, pcu, 0, capid3); + + if (!read_local_reg(®)) + return 0; + + edac_dbg(2, "Pkg%d %s mc instances bitmap 0x%llx (reg 0x%llx)\n", + pkg, north ? "north" : "south", + DDR_IMC_BITMAP(reg.val), reg.val); + + return DDR_IMC_BITMAP(reg.val); +} + +static void imc_release(struct device *dev) +{ + edac_dbg(2, "imc device %s released\n", dev_name(dev)); + kfree(dev); +} + +static int __get_ddr_munits(struct res_config *cfg, struct skx_dev *d, + bool north, int lmc) +{ + unsigned long size = cfg->ddr_chan_mmio_sz * cfg->ddr_chan_num; + unsigned long bitmap = get_imc_bitmap(cfg, d->pkg, north); + void __iomem *mbase; + struct device *dev; + int i, rc, pmc; + u64 base; + + for_each_set_bit(i, &bitmap, sizeof(bitmap) * 8) { + base = north ? d->mmio_base_h_north : d->mmio_base_h_south; + base += cfg->ddr_imc_base + size * i; + + edac_dbg(2, "Pkg%d mc%d mmio base 0x%llx size 0x%lx\n", + d->pkg, lmc, base, size); + + /* Set up the imc MMIO. */ + mbase = ioremap(base, size); + if (!mbase) { + imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", base); + return -ENOMEM; + } + + d->imc[lmc].mbase = mbase; + d->imc[lmc].lmc = lmc; + + /* Create the imc device instance. */ + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->release = imc_release; + device_initialize(dev); + rc = dev_set_name(dev, "0x%llx", base); + if (rc) { + imh_printk(KERN_ERR, "Failed to set dev name\n"); + put_device(dev); + return rc; + } + + d->imc[lmc].dev = dev; + + /* Set up the imc index mapping. */ + pmc = north ? i : 8 + i; + skx_set_mc_mapping(d, pmc, lmc); + + lmc++; + } + + return lmc; +} + +static bool get_ddr_munits(struct res_config *cfg, struct skx_dev *d) +{ + int lmc = __get_ddr_munits(cfg, d, true, 0); + + if (lmc < 0) + return false; + + lmc = __get_ddr_munits(cfg, d, false, lmc); + if (lmc <= 0) + return false; + + return true; +} + +static bool get_socket_id(struct res_config *cfg, struct skx_dev *d) +{ + DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ubox, 0, socket_id); + u8 src_id; + int i; + + if (!read_local_reg(®)) + return false; + + src_id = SOCKET_ID(reg.val); + edac_dbg(2, "socket id 0x%x (reg 0x%llx)\n", src_id, reg.val); + + for (i = 0; i < cfg->ddr_imc_num; i++) + d->imc[i].src_id = src_id; + + return true; +} + +/* Get TOLM (Top Of Low Memory) and TOHM (Top Of High Memory) parameters. */ +static bool imh_get_tolm_tohm(struct res_config *cfg, u64 *tolm, u64 *tohm) +{ + DEFINE_LOCAL_REG(reg, cfg, 0, true, sca, 0, tolm); + + if (!read_local_reg(®)) + return false; + + *tolm = TOLM(reg.val); + edac_dbg(2, "tolm 0x%llx (reg 0x%llx)\n", *tolm, reg.val); + + DEFINE_LOCAL_REG(reg2, cfg, 0, true, sca, 0, tohm); + + if (!read_local_reg(®2)) + return false; + + *tohm = TOHM(reg2.val); + edac_dbg(2, "tohm 0x%llx (reg 0x%llx)\n", *tohm, reg2.val); + + return true; +} + +/* Get the system-view MMIO_BASE_H for {north,south}-IMH. */ +static int imh_get_all_mmio_base_h(struct res_config *cfg, struct list_head *edac_list) +{ + int i, n = topology_max_packages(), imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num; + struct skx_dev *d; + + for (i = 0; i < n; i++) { + d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL); + if (!d) + return -ENOMEM; + + DEFINE_LOCAL_REG(reg, cfg, i, true, ubox, 0, mmio_base); + + /* Get MMIO_BASE_H for the north-IMH. */ + if (!read_local_reg(®) || !reg.val) { + kfree(d); + imh_printk(KERN_ERR, "Pkg%d has no north mmio_base_h\n", i); + return -ENODEV; + } + + d->mmio_base_h_north = MMIO_BASE_H(reg.val); + edac_dbg(2, "Pkg%d north mmio_base_h 0x%llx (reg 0x%llx)\n", + i, d->mmio_base_h_north, reg.val); + + /* Get MMIO_BASE_H for the south-IMH (optional). */ + DEFINE_LOCAL_REG(reg2, cfg, i, false, ubox, 0, mmio_base); + + if (read_local_reg(®2)) { + d->mmio_base_h_south = MMIO_BASE_H(reg2.val); + edac_dbg(2, "Pkg%d south mmio_base_h 0x%llx (reg 0x%llx)\n", + i, d->mmio_base_h_south, reg2.val); + } + + d->pkg = i; + d->num_imc = imc_num; + skx_init_mc_mapping(d); + list_add_tail(&d->list, edac_list); + } + + return 0; +} + +/* Get the number of per-package memory controllers. */ +static int imh_get_imc_num(struct res_config *cfg) +{ + int imc_num = hweight32(get_imc_bitmap(cfg, 0, true)) + + hweight32(get_imc_bitmap(cfg, 0, false)); + + if (!imc_num) { + imh_printk(KERN_ERR, "Invalid mc number\n"); + return -ENODEV; + } + + if (cfg->ddr_imc_num != imc_num) { + /* + * Update the configuration data to reflect the number of + * present DDR memory controllers. + */ + cfg->ddr_imc_num = imc_num; + edac_dbg(2, "Set ddr mc number %d\n", imc_num); + } + + return 0; +} + +/* Get all memory controllers' parameters. */ +static int imh_get_munits(struct res_config *cfg, struct list_head *edac_list) +{ + struct skx_imc *imc; + struct skx_dev *d; + u8 mc = 0; + int i; + + list_for_each_entry(d, edac_list, list) { + if (!get_ddr_munits(cfg, d)) { + imh_printk(KERN_ERR, "No mc found\n"); + return -ENODEV; + } + + if (!get_socket_id(cfg, d)) { + imh_printk(KERN_ERR, "Failed to get socket id\n"); + return -ENODEV; + } + + for (i = 0; i < cfg->ddr_imc_num; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + imc->chan_mmio_sz = cfg->ddr_chan_mmio_sz; + imc->num_channels = cfg->ddr_chan_num; + imc->num_dimms = cfg->ddr_dimm_num; + imc->mc = mc++; + } + } + + return 0; +} + +/* Helpers to read memory controller registers */ +static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) +{ + return readx(imc->mbase + imc->chan_mmio_sz * chan + offset, width); +} + +static u32 read_imc_mcmtr(struct res_config *cfg, struct skx_imc *imc, int chan) +{ + return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_mcmtr_offset, cfg->ddr_reg_mcmtr_width); +} + +static u32 read_imc_dimmmtr(struct res_config *cfg, struct skx_imc *imc, int chan, int dimm) +{ + return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_dimmmtr_offset + + cfg->ddr_reg_dimmmtr_width * dimm, + cfg->ddr_reg_dimmmtr_width); +} + +static bool ecc_enabled(u32 mcmtr) +{ + return (bool)ECC_ENABLED(mcmtr); +} + +static bool dimm_populated(u32 dimmmtr) +{ + return (bool)DIMM_POPULATED(dimmmtr); +} + +/* Get each DIMM's configurations of the memory controller @mci. */ +static int imh_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) +{ + struct skx_pvt *pvt = mci->pvt_info; + struct skx_imc *imc = pvt->imc; + struct dimm_info *dimm; + u32 mcmtr, dimmmtr; + int i, j, ndimms; + + for (i = 0; i < imc->num_channels; i++) { + if (!imc->mbase) + continue; + + mcmtr = read_imc_mcmtr(cfg, imc, i); + + for (ndimms = 0, j = 0; j < imc->num_dimms; j++) { + dimmmtr = read_imc_dimmmtr(cfg, imc, i, j); + edac_dbg(1, "mcmtr 0x%x dimmmtr 0x%x (mc%d ch%d dimm%d)\n", + mcmtr, dimmmtr, imc->mc, i, j); + + if (!dimm_populated(dimmmtr)) + continue; + + dimm = edac_get_dimm(mci, i, j, 0); + ndimms += skx_get_dimm_info(dimmmtr, 0, 0, dimm, + imc, i, j, cfg); + } + + if (ndimms && !ecc_enabled(mcmtr)) { + imh_printk(KERN_ERR, "ECC is disabled on mc%d ch%d\n", + imc->mc, i); + return -ENODEV; + } + } + + return 0; +} + +/* Register all memory controllers to the EDAC core. */ +static int imh_register_mci(struct res_config *cfg, struct list_head *edac_list) +{ + struct skx_imc *imc; + struct skx_dev *d; + int i, rc; + + list_for_each_entry(d, edac_list, list) { + for (i = 0; i < cfg->ddr_imc_num; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + rc = skx_register_mci(imc, imc->dev, + dev_name(imc->dev), + "Intel IMH-based Socket", + EDAC_MOD_STR, + imh_get_dimm_config, cfg); + if (rc) + return rc; + } + } + + return 0; +} + +static struct res_config dmr_cfg = { + .type = DMR, + .support_ddr5 = true, + .mmio_base_l_north = 0xf6800000, + .mmio_base_l_south = 0xf6000000, + .ddr_chan_num = 1, + .ddr_dimm_num = 2, + .ddr_imc_base = 0x39b000, + .ddr_chan_mmio_sz = 0x8000, + .ddr_reg_mcmtr_offset = 0x360, + .ddr_reg_mcmtr_width = 4, + .ddr_reg_dimmmtr_offset = 0x370, + .ddr_reg_dimmmtr_width = 4, + .ubox_base = 0x0, + .ubox_size = 0x2000, + .ubox_reg_mmio_base_offset = 0x580, + .ubox_reg_mmio_base_width = 4, + .ubox_reg_socket_id_offset = 0x1080, + .ubox_reg_socket_id_width = 4, + .pcu_base = 0x3000, + .pcu_size = 0x10000, + .pcu_reg_capid3_offset = 0x290, + .pcu_reg_capid3_width = 4, + .sca_base = 0x24c000, + .sca_size = 0x2500, + .sca_reg_tolm_offset = 0x2100, + .sca_reg_tolm_width = 8, + .sca_reg_tohm_offset = 0x2108, + .sca_reg_tohm_width = 8, +}; + +static const struct x86_cpu_id imh_cpuids[] = { + X86_MATCH_VFM(INTEL_DIAMONDRAPIDS_X, &dmr_cfg), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, imh_cpuids); + +static struct notifier_block imh_mce_dec = { + .notifier_call = skx_mce_check_error, + .priority = MCE_PRIO_EDAC, +}; + +static int __init imh_init(void) +{ + const struct x86_cpu_id *id; + struct list_head *edac_list; + struct res_config *cfg; + const char *owner; + u64 tolm, tohm; + int rc; + + edac_dbg(2, "\n"); + + if (ghes_get_devices()) + return -EBUSY; + + owner = edac_get_owner(); + if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) + return -EBUSY; + + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(imh_cpuids); + if (!id) + return -ENODEV; + cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); + + if (!imh_get_tolm_tohm(cfg, &tolm, &tohm)) + return -ENODEV; + + skx_set_hi_lo(tolm, tohm); + + rc = imh_get_imc_num(cfg); + if (rc < 0) + goto fail; + + edac_list = skx_get_edac_list(); + + rc = imh_get_all_mmio_base_h(cfg, edac_list); + if (rc) + goto fail; + + rc = imh_get_munits(cfg, edac_list); + if (rc) + goto fail; + + rc = imh_register_mci(cfg, edac_list); + if (rc) + goto fail; + + rc = skx_adxl_get(); + if (rc) + goto fail; + + opstate_init(); + mce_register_decode_chain(&imh_mce_dec); + + imh_printk(KERN_INFO, "%s\n", IMH_REVISION); + + return 0; +fail: + skx_remove(); + return rc; +} + +static void __exit imh_exit(void) +{ + edac_dbg(2, "\n"); + + mce_unregister_decode_chain(&imh_mce_dec); + skx_adxl_put(); + skx_remove(); +} + +module_init(imh_init); +module_exit(imh_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Qiuxu Zhuo"); +MODULE_DESCRIPTION("MC Driver for Intel servers using IMH-based memory controller"); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index e45abd0008ef..32a4ef27a987 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -124,7 +124,7 @@ void skx_adxl_put(void) } EXPORT_SYMBOL_GPL(skx_adxl_put); -static void skx_init_mc_mapping(struct skx_dev *d) +void skx_init_mc_mapping(struct skx_dev *d) { /* * By default, the BIOS presents all memory controllers within each @@ -135,6 +135,7 @@ static void skx_init_mc_mapping(struct skx_dev *d) for (int i = 0; i < d->num_imc; i++) d->imc[i].mc_mapping = i; } +EXPORT_SYMBOL_GPL(skx_init_mc_mapping); void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) { @@ -823,6 +824,9 @@ void skx_remove(void) if (d->imc[i].mbase) iounmap(d->imc[i].mbase); + if (d->imc[i].dev) + put_device(d->imc[i].dev); + for (j = 0; j < d->imc[i].num_channels; j++) { if (d->imc[i].chan[j].cdev) pci_dev_put(d->imc[i].chan[j].cdev); @@ -846,7 +850,7 @@ EXPORT_SYMBOL_GPL(skx_remove); /* * Debug feature. * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/{skx,i10nm}_test/addr. + * /sys/kernel/debug/edac/{skx,i10nm,imh}_test/addr. */ static struct dentry *skx_test; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 86a883d3c2a4..39a3462ede28 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -121,20 +121,33 @@ struct reg_rrl { * memory controllers on the die. */ struct skx_dev { - struct list_head list; + /* {skx,i10nm}_edac */ u8 bus[4]; int seg; struct pci_dev *sad_all; struct pci_dev *util_all; - struct pci_dev *uracu; /* for i10nm CPU */ - struct pci_dev *pcu_cr3; /* for HBM memory detection */ + struct pci_dev *uracu; + struct pci_dev *pcu_cr3; u32 mcroute; + + /* imh_edac */ + /* System-view MMIO base physical addresses. */ + u64 mmio_base_h_north; + u64 mmio_base_h_south; + int pkg; + int num_imc; + struct list_head list; struct skx_imc { + /* i10nm_edac */ + struct pci_dev *mdev; + + /* imh_edac */ + struct device *dev; + struct mem_ctl_info *mci; - struct pci_dev *mdev; /* for i10nm CPU */ - void __iomem *mbase; /* for i10nm CPU */ - int chan_mmio_sz; /* for i10nm CPU */ + void __iomem *mbase; + int chan_mmio_sz; int num_channels; /* channels per memory controller */ int num_dimms; /* dimms per channel */ bool hbm_mc; @@ -178,7 +191,8 @@ enum type { SKX, I10NM, SPR, - GNR + GNR, + DMR, }; enum { @@ -237,10 +251,6 @@ struct pci_bdf { struct res_config { enum type type; - /* Configuration agent device ID */ - unsigned int decs_did; - /* Default bus number configuration register offset */ - int busno_cfg_offset; /* DDR memory controllers per socket */ int ddr_imc_num; /* DDR channels per DDR memory controller */ @@ -258,23 +268,53 @@ struct res_config { /* Per HBM channel memory-mapped I/O size */ int hbm_chan_mmio_sz; bool support_ddr5; - /* SAD device BDF */ - struct pci_bdf sad_all_bdf; - /* PCU device BDF */ - struct pci_bdf pcu_cr3_bdf; - /* UTIL device BDF */ - struct pci_bdf util_all_bdf; - /* URACU device BDF */ - struct pci_bdf uracu_bdf; - /* DDR mdev device BDF */ - struct pci_bdf ddr_mdev_bdf; - /* HBM mdev device BDF */ - struct pci_bdf hbm_mdev_bdf; - int sad_all_offset; /* RRL register sets per DDR channel */ struct reg_rrl *reg_rrl_ddr; /* RRL register sets per HBM channel */ struct reg_rrl *reg_rrl_hbm[2]; + union { + /* {skx,i10nm}_edac */ + struct { + /* Configuration agent device ID */ + unsigned int decs_did; + /* Default bus number configuration register offset */ + int busno_cfg_offset; + struct pci_bdf sad_all_bdf; + struct pci_bdf pcu_cr3_bdf; + struct pci_bdf util_all_bdf; + struct pci_bdf uracu_bdf; + struct pci_bdf ddr_mdev_bdf; + struct pci_bdf hbm_mdev_bdf; + int sad_all_offset; + }; + /* imh_edac */ + struct { + /* MMIO base physical address in local package view */ + u64 mmio_base_l_north; + u64 mmio_base_l_south; + u64 ddr_imc_base; + u64 ddr_reg_mcmtr_offset; + u8 ddr_reg_mcmtr_width; + u64 ddr_reg_dimmmtr_offset; + u8 ddr_reg_dimmmtr_width; + u64 ubox_base; + u32 ubox_size; + u32 ubox_reg_mmio_base_offset; + u8 ubox_reg_mmio_base_width; + u32 ubox_reg_socket_id_offset; + u8 ubox_reg_socket_id_width; + u64 pcu_base; + u32 pcu_size; + u32 pcu_reg_capid3_offset; + u8 pcu_reg_capid3_width; + u64 sca_base; + u32 sca_size; + u32 sca_reg_tolm_offset; + u8 sca_reg_tolm_width; + u32 sca_reg_tohm_offset; + u8 sca_reg_tohm_width; + }; + }; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, @@ -287,6 +327,7 @@ void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); void skx_set_res_cfg(struct res_config *cfg); +void skx_init_mc_mapping(struct skx_dev *d); void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); From 39abdcbdad597b1ac3dabd44a757de91b87c683a Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 19 Nov 2025 12:11:40 -0800 Subject: [PATCH 14/18] EDAC/skx_common: Extend the maximum number of DRAM chip row bits The allowed maximum number of row bits for DRAM chips in the Diamond Rapids server processor is up to 19. Extend the current maximum row bits from 18 to 19. Tested-by: Yi Lai Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://patch.msgid.link/20251119134132.2389472-6-qiuxu.zhuo@intel.com --- drivers/edac/skx_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 32a4ef27a987..3276afe43922 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -451,7 +451,7 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, } #define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks") -#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows") +#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 7, "rows") #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, From f619613f3058dee38f50b116ec774c5295c8b08b Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 19 Nov 2025 12:11:40 -0800 Subject: [PATCH 15/18] EDAC/{skx_comm,imh}: Detect 2-level memory configuration Detect 2-level memory configurations and notify the 'skx_common' library to enable ADXL 2-level memory error decoding. Tested-by: Yi Lai Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://patch.msgid.link/20251119134132.2389472-7-qiuxu.zhuo@intel.com --- drivers/edac/imh_base.c | 38 ++++++++++++++++++++++++++++++++++++++ drivers/edac/skx_common.h | 4 ++++ 2 files changed, 42 insertions(+) diff --git a/drivers/edac/imh_base.c b/drivers/edac/imh_base.c index 2be27be78390..2b0db6321da0 100644 --- a/drivers/edac/imh_base.c +++ b/drivers/edac/imh_base.c @@ -35,6 +35,9 @@ #define TOLM(reg) (((u64)GET_BITFIELD(reg, 16, 31)) << 16) #define TOHM(reg) (((u64)GET_BITFIELD(reg, 16, 51)) << 16) +/* Home Agent (HA) */ +#define NMCACHING(reg) GET_BITFIELD(reg, 8, 8) + /** * struct local_reg - A register as described in the local package view. * @@ -346,6 +349,35 @@ static int imh_get_munits(struct res_config *cfg, struct list_head *edac_list) return 0; } +static bool check_2lm_enabled(struct res_config *cfg, struct skx_dev *d, int ha_idx) +{ + DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ha, ha_idx, mode); + + if (!read_local_reg(®)) + return false; + + if (!NMCACHING(reg.val)) + return false; + + edac_dbg(2, "2-level memory configuration (reg 0x%llx, ha idx %d)\n", reg.val, ha_idx); + return true; +} + +/* Check whether the system has a 2-level memory configuration. */ +static bool imh_2lm_enabled(struct res_config *cfg, struct list_head *head) +{ + struct skx_dev *d; + int i; + + list_for_each_entry(d, head, list) { + for (i = 0; i < cfg->ddr_imc_num; i++) + if (check_2lm_enabled(cfg, d, i)) + return true; + } + + return false; +} + /* Helpers to read memory controller registers */ static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) { @@ -467,6 +499,10 @@ static struct res_config dmr_cfg = { .sca_reg_tolm_width = 8, .sca_reg_tohm_offset = 0x2108, .sca_reg_tohm_width = 8, + .ha_base = 0x3eb000, + .ha_size = 0x1000, + .ha_reg_mode_offset = 0x4a0, + .ha_reg_mode_width = 4, }; static const struct x86_cpu_id imh_cpuids[] = { @@ -526,6 +562,8 @@ static int __init imh_init(void) if (rc) goto fail; + skx_set_mem_cfg(imh_2lm_enabled(cfg, edac_list)); + rc = imh_register_mci(cfg, edac_list); if (rc) goto fail; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 39a3462ede28..f88038e5b18c 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -313,6 +313,10 @@ struct res_config { u8 sca_reg_tolm_width; u32 sca_reg_tohm_offset; u8 sca_reg_tohm_width; + u64 ha_base; + u32 ha_size; + u32 ha_reg_mode_offset; + u8 ha_reg_mode_width; }; }; }; From 5f40ea7f41773d996d92db8bde600199200adc11 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 19 Nov 2025 12:11:40 -0800 Subject: [PATCH 16/18] EDAC/imh: Setup 'imh_test' debugfs testing node Setup the following debugfs testing node to enable fake memory error address decoding tests for the imh_edac driver. /sys/kernel/debug/edac/imh_test/addr Tested-by: Yi Lai Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://patch.msgid.link/20251119134132.2389472-8-qiuxu.zhuo@intel.com --- drivers/edac/imh_base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/edac/imh_base.c b/drivers/edac/imh_base.c index 2b0db6321da0..4348b3883b45 100644 --- a/drivers/edac/imh_base.c +++ b/drivers/edac/imh_base.c @@ -574,6 +574,7 @@ static int __init imh_init(void) opstate_init(); mce_register_decode_chain(&imh_mce_dec); + skx_setup_debug("imh_test"); imh_printk(KERN_INFO, "%s\n", IMH_REVISION); @@ -587,6 +588,7 @@ static void __exit imh_exit(void) { edac_dbg(2, "\n"); + skx_teardown_debug(); mce_unregister_decode_chain(&imh_mce_dec); skx_adxl_put(); skx_remove(); From ef1b6d904993d3a21baa7d4105e1a4e4ba9dd6de Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 5 Nov 2025 17:02:44 +0800 Subject: [PATCH 17/18] EDAC/igen6: Fix error handling in igen6_edac driver The igen6_edac driver calls device_initialize() for all memory controllers in igen6_register_mci(), but misses corresponding put_device() calls in error paths and during normal shutdown in igen6_unregister_mcis(). Adding the missing put_device() calls improves code readability and ensures proper reference counting for the device structure. Found by code review. Signed-off-by: Ma Ke Reviewed-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://patch.msgid.link/20251105090244.23327-1-make24@iscas.ac.cn --- drivers/edac/igen6_edac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 2fc59f9eed69..553c31a2d922 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -1300,6 +1300,7 @@ static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev imc->mci = mci; return 0; fail3: + put_device(&imc->dev); mci->pvt_info = NULL; kfree(mci->ctl_name); fail2: @@ -1326,6 +1327,7 @@ static void igen6_unregister_mcis(void) kfree(mci->ctl_name); mci->pvt_info = NULL; edac_mc_free(mci); + put_device(&imc->dev); iounmap(imc->window); } } From 69acbdbbefbda7b7b32faa706a8f68c399c9e47b Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 24 Nov 2025 16:25:17 +0200 Subject: [PATCH 18/18] RAS/AMD/ATL: Replace bitwise_xor_bits() with hweight16() Doing hweight16() and checking whether the lsb is set is functionally equivalent to what bitwise_xor_bits() does. In addition, it results in better generated code as before gcc would inline the function 4 times. With hweight16(), the resulting code boils down to 2 instructions - POPCNT and AND, and all relevant CPUs support POPCNT. An alternative would have been to use the __builtin_parity() function provided by both Clang/GCC, however under some circumstances the compiler can choose not to inline it but generate a library call which is unsupported in the kernel. No functional changes. [ bp: Massage commit message. ] Signed-off-by: Nikolay Borisov Signed-off-by: Borislav Petkov (AMD) Link: https://patch.msgid.link/20251124142517.1708451-1-nik.borisov@suse.com --- drivers/ras/amd/atl/umc.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 18ce419236a5..befc616d5e8a 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -49,17 +49,6 @@ static u8 get_coh_st_inst_id_mi300(struct atl_err *err) return i; } -/* XOR the bits in @val. */ -static u16 bitwise_xor_bits(u16 val) -{ - u16 tmp = 0; - u8 i; - - for (i = 0; i < 16; i++) - tmp ^= (val >> i) & 0x1; - - return tmp; -} struct xor_bits { bool xor_enable; @@ -250,17 +239,17 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) if (!addr_hash.bank[i].xor_enable) continue; - temp = bitwise_xor_bits(col & addr_hash.bank[i].col_xor); - temp ^= bitwise_xor_bits(row & addr_hash.bank[i].row_xor); + temp = hweight16(col & addr_hash.bank[i].col_xor) & 1; + temp ^= hweight16(row & addr_hash.bank[i].row_xor) & 1; bank ^= temp << i; } /* Calculate hash for PC bit. */ if (addr_hash.pc.xor_enable) { - temp = bitwise_xor_bits(col & addr_hash.pc.col_xor); - temp ^= bitwise_xor_bits(row & addr_hash.pc.row_xor); + temp = hweight16(col & addr_hash.pc.col_xor) & 1; + temp ^= hweight16(row & addr_hash.pc.row_xor) & 1; /* Bits SID[1:0] act as Bank[5:4] for PC hash, so apply them here. */ - temp ^= bitwise_xor_bits((bank | sid << NUM_BANK_BITS) & addr_hash.bank_xor); + temp ^= hweight16((bank | sid << NUM_BANK_BITS) & addr_hash.bank_xor) & 1; pc ^= temp; }