Files
linux/drivers/of/of_numa.c
Yin Tirui ee4d098cbc of_numa: fix uninitialized memory nodes causing kernel panic
When there are memory-only nodes (nodes without CPUs), these nodes are not
properly initialized, causing kernel panic during boot.

of_numa_init
	of_numa_parse_cpu_nodes
		node_set(nid, numa_nodes_parsed);
	of_numa_parse_memory_nodes

In of_numa_parse_cpu_nodes, numa_nodes_parsed gets updated only for nodes
containing CPUs.  Memory-only nodes should have been updated in
of_numa_parse_memory_nodes, but they weren't.

Subsequently, when free_area_init() attempts to access NODE_DATA() for
these uninitialized memory nodes, the kernel panics due to NULL pointer
dereference.

This can be reproduced on ARM64 QEMU with 1 CPU and 2 memory nodes:

qemu-system-aarch64 \
-cpu host -nographic \
-m 4G -smp 1 \
-machine virt,accel=kvm,gic-version=3,iommu=smmuv3 \
-object memory-backend-ram,size=2G,id=mem0 \
-object memory-backend-ram,size=2G,id=mem1 \
-numa node,nodeid=0,memdev=mem0 \
-numa node,nodeid=1,memdev=mem1 \
-kernel $IMAGE \
-hda $DISK \
-append "console=ttyAMA0 root=/dev/vda rw earlycon"

[    0.000000] Booting Linux on physical CPU 0x0000000000 [0x481fd010]
[    0.000000] Linux version 6.17.0-rc1-00001-gabb4b3daf18c-dirty (yintirui@local) (gcc (GCC) 12.3.1, GNU ld (GNU Binutils) 2.41) #52 SMP PREEMPT Mon Aug 18 09:49:40 CST 2025
[    0.000000] KASLR enabled
[    0.000000] random: crng init done
[    0.000000] Machine model: linux,dummy-virt
[    0.000000] efi: UEFI not found.
[    0.000000] earlycon: pl11 at MMIO 0x0000000009000000 (options '')
[    0.000000] printk: legacy bootconsole [pl11] enabled
[    0.000000] OF: reserved mem: Reserved memory: No reserved-memory node in the DT
[    0.000000] NODE_DATA(0) allocated [mem 0xbfffd9c0-0xbfffffff]
[    0.000000] node 1 must be removed before remove section 23
[    0.000000] Zone ranges:
[    0.000000]   DMA      [mem 0x0000000040000000-0x00000000ffffffff]
[    0.000000]   DMA32    empty
[    0.000000]   Normal   [mem 0x0000000100000000-0x000000013fffffff]
[    0.000000] Movable zone start for each node
[    0.000000] Early memory node ranges
[    0.000000]   node   0: [mem 0x0000000040000000-0x00000000bfffffff]
[    0.000000]   node   1: [mem 0x00000000c0000000-0x000000013fffffff]
[    0.000000] Initmem setup node 0 [mem 0x0000000040000000-0x00000000bfffffff]
[    0.000000] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0
[    0.000000] Mem abort info:
[    0.000000]   ESR = 0x0000000096000004
[    0.000000]   EC = 0x25: DABT (current EL), IL = 32 bits
[    0.000000]   SET = 0, FnV = 0
[    0.000000]   EA = 0, S1PTW = 0
[    0.000000]   FSC = 0x04: level 0 translation fault
[    0.000000] Data abort info:
[    0.000000]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
[    0.000000]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[    0.000000]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[    0.000000] [00000000000000a0] user address but active_mm is swapper
[    0.000000] Internal error: Oops: 0000000096000004 [#1]  SMP
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.17.0-rc1-00001-g760c6dabf762-dirty #54 PREEMPT
[    0.000000] Hardware name: linux,dummy-virt (DT)
[    0.000000] pstate: 800000c5 (Nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    0.000000] pc : free_area_init+0x50c/0xf9c
[    0.000000] lr : free_area_init+0x5c0/0xf9c
[    0.000000] sp : ffffa02ca0f33c00
[    0.000000] x29: ffffa02ca0f33cb0 x28: 0000000000000000 x27: 0000000000000000
[    0.000000] x26: 4ec4ec4ec4ec4ec5 x25: 00000000000c0000 x24: 00000000000c0000
[    0.000000] x23: 0000000000040000 x22: 0000000000000000 x21: ffffa02ca0f3b368
[    0.000000] x20: ffffa02ca14c7b98 x19: 0000000000000000 x18: 0000000000000002
[    0.000000] x17: 000000000000cacc x16: 0000000000000001 x15: 0000000000000001
[    0.000000] x14: 0000000080000000 x13: 0000000000000018 x12: 0000000000000002
[    0.000000] x11: ffffa02ca0fd4f00 x10: ffffa02ca14bab20 x9 : ffffa02ca14bab38
[    0.000000] x8 : 00000000000c0000 x7 : 0000000000000001 x6 : 0000000000000002
[    0.000000] x5 : 0000000140000000 x4 : ffffa02ca0f33c90 x3 : ffffa02ca0f33ca0
[    0.000000] x2 : ffffa02ca0f33c98 x1 : 0000000080000000 x0 : 0000000000000001
[    0.000000] Call trace:
[    0.000000]  free_area_init+0x50c/0xf9c (P)
[    0.000000]  bootmem_init+0x110/0x1dc
[    0.000000]  setup_arch+0x278/0x60c
[    0.000000]  start_kernel+0x70/0x748
[    0.000000]  __primary_switched+0x88/0x90
[    0.000000] Code: d503201f b98093e0 52800016 f8607a93 (f9405260)
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
[    0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---

Link: https://lkml.kernel.org/r/20250819075510.2079961-1-yintirui@huawei.com
Fixes: 767507654c ("arch_numa: switch over to numa_memblks")
Signed-off-by: Yin Tirui <yintirui@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Chen Jun <chenjun102@huawei.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-08-27 22:45:41 -07:00

188 lines
3.9 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* OF NUMA Parsing support.
*
* Copyright (C) 2015 - 2016 Cavium Inc.
*/
#define pr_fmt(fmt) "OF: NUMA: " fmt
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/nodemask.h>
#include <linux/numa_memblks.h>
#include <asm/numa.h>
/*
* Even though we connect cpus to numa domains later in SMP
* init, we need to know the node ids now for all cpus.
*/
static void __init of_numa_parse_cpu_nodes(void)
{
u32 nid;
int r;
struct device_node *np;
for_each_of_cpu_node(np) {
r = of_property_read_u32(np, "numa-node-id", &nid);
if (r)
continue;
pr_debug("CPU on %u\n", nid);
if (nid >= MAX_NUMNODES)
pr_warn("Node id %u exceeds maximum value\n", nid);
else
node_set(nid, numa_nodes_parsed);
}
}
static int __init of_numa_parse_memory_nodes(void)
{
struct device_node *np = NULL;
struct resource rsrc;
u32 nid;
int i, r = -EINVAL;
for_each_node_by_type(np, "memory") {
r = of_property_read_u32(np, "numa-node-id", &nid);
if (r == -EINVAL)
/*
* property doesn't exist if -EINVAL, continue
* looking for more memory nodes with
* "numa-node-id" property
*/
continue;
if (nid >= MAX_NUMNODES) {
pr_warn("Node id %u exceeds maximum value\n", nid);
r = -EINVAL;
}
for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) {
r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
if (!r)
node_set(nid, numa_nodes_parsed);
}
if (!i || r) {
of_node_put(np);
pr_err("bad property in memory node\n");
return r ? : -EINVAL;
}
}
return r;
}
static int __init of_numa_parse_distance_map_v1(struct device_node *map)
{
const __be32 *matrix;
int entry_count;
int i;
pr_info("parsing numa-distance-map-v1\n");
matrix = of_get_property(map, "distance-matrix", NULL);
if (!matrix) {
pr_err("No distance-matrix property in distance-map\n");
return -EINVAL;
}
entry_count = of_property_count_u32_elems(map, "distance-matrix");
if (entry_count <= 0) {
pr_err("Invalid distance-matrix\n");
return -EINVAL;
}
for (i = 0; i + 2 < entry_count; i += 3) {
u32 nodea, nodeb, distance;
nodea = of_read_number(matrix, 1);
matrix++;
nodeb = of_read_number(matrix, 1);
matrix++;
distance = of_read_number(matrix, 1);
matrix++;
if ((nodea == nodeb && distance != LOCAL_DISTANCE) ||
(nodea != nodeb && distance <= LOCAL_DISTANCE)) {
pr_err("Invalid distance[node%d -> node%d] = %d\n",
nodea, nodeb, distance);
return -EINVAL;
}
node_set(nodea, numa_nodes_parsed);
numa_set_distance(nodea, nodeb, distance);
/* Set default distance of node B->A same as A->B */
if (nodeb > nodea)
numa_set_distance(nodeb, nodea, distance);
}
return 0;
}
static int __init of_numa_parse_distance_map(void)
{
int ret = 0;
struct device_node *np;
np = of_find_compatible_node(NULL, NULL,
"numa-distance-map-v1");
if (np)
ret = of_numa_parse_distance_map_v1(np);
of_node_put(np);
return ret;
}
int of_node_to_nid(struct device_node *device)
{
struct device_node *np;
u32 nid;
int r = -ENODATA;
np = of_node_get(device);
while (np) {
r = of_property_read_u32(np, "numa-node-id", &nid);
/*
* -EINVAL indicates the property was not found, and
* we walk up the tree trying to find a parent with a
* "numa-node-id". Any other type of error indicates
* a bad device tree and we give up.
*/
if (r != -EINVAL)
break;
np = of_get_next_parent(np);
}
if (np && r)
pr_warn("Invalid \"numa-node-id\" property in node %pOFn\n",
np);
of_node_put(np);
/*
* If numa=off passed on command line, or with a defective
* device tree, the nid may not be in the set of possible
* nodes. Check for this case and return NUMA_NO_NODE.
*/
if (!r && nid < MAX_NUMNODES && node_possible(nid))
return nid;
return NUMA_NO_NODE;
}
int __init of_numa_init(void)
{
int r;
of_numa_parse_cpu_nodes();
r = of_numa_parse_memory_nodes();
if (r)
return r;
return of_numa_parse_distance_map();
}