kho: drop notifiers

The KHO framework uses a notifier chain as the mechanism for clients to
participate in the finalization process.  While this works for a single,
central state machine, it is too restrictive for kernel-internal
components like pstore/reserve_mem or IMA.  These components need a
simpler, direct way to register their state for preservation (e.g., during
their initcall) without being part of a complex, shutdown-time notifier
sequence.  The notifier model forces all participants into a single
finalization flow and makes direct preservation from an arbitrary context
difficult.  This patch refactors the client participation model by
removing the notifier chain and introducing a direct API for managing FDT
subtrees.

The core kho_finalize() and kho_abort() state machine remains, but clients
now register their data with KHO beforehand.

Link: https://lkml.kernel.org/r/20251101142325.1326536-3-pasha.tatashin@soleen.com
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Co-developed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Simon Horman <horms@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Mike Rapoport (Microsoft)
2025-11-01 10:23:18 -04:00
committed by Andrew Morton
parent 03d3963464
commit 70f9133096
6 changed files with 125 additions and 188 deletions

View File

@@ -10,14 +10,7 @@ struct kho_scratch {
phys_addr_t size;
};
/* KHO Notifier index */
enum kho_event {
KEXEC_KHO_FINALIZE = 0,
KEXEC_KHO_ABORT = 1,
};
struct folio;
struct notifier_block;
struct page;
#define DECLARE_KHOSER_PTR(name, type) \
@@ -37,8 +30,6 @@ struct page;
(typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
})
struct kho_serialization;
struct kho_vmalloc_chunk;
struct kho_vmalloc {
DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *);
@@ -57,12 +48,10 @@ int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation);
struct folio *kho_restore_folio(phys_addr_t phys);
struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages);
void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
int kho_add_subtree(const char *name, void *fdt);
void kho_remove_subtree(void *fdt);
int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
int register_kho_notifier(struct notifier_block *nb);
int unregister_kho_notifier(struct notifier_block *nb);
void kho_memory_init(void);
void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys,
@@ -110,27 +99,20 @@ static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
return NULL;
}
static inline int kho_add_subtree(struct kho_serialization *ser,
const char *name, void *fdt)
static inline int kho_add_subtree(const char *name, void *fdt)
{
return -EOPNOTSUPP;
}
static inline void kho_remove_subtree(void *fdt)
{
}
static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
{
return -EOPNOTSUPP;
}
static inline int register_kho_notifier(struct notifier_block *nb)
{
return -EOPNOTSUPP;
}
static inline int unregister_kho_notifier(struct notifier_block *nb)
{
return -EOPNOTSUPP;
}
static inline void kho_memory_init(void)
{
}

View File

@@ -16,7 +16,6 @@
#include <linux/libfdt.h>
#include <linux/list.h>
#include <linux/memblock.h>
#include <linux/notifier.h>
#include <linux/page-isolation.h>
#include <linux/vmalloc.h>
@@ -103,29 +102,34 @@ struct kho_mem_track {
struct khoser_mem_chunk;
struct kho_serialization {
struct page *fdt;
struct kho_mem_track track;
/* First chunk of serialized preserved memory map */
struct khoser_mem_chunk *preserved_mem_map;
struct kho_sub_fdt {
struct list_head l;
const char *name;
void *fdt;
};
struct kho_out {
struct blocking_notifier_head chain_head;
struct mutex lock; /* protects KHO FDT finalization */
struct kho_serialization ser;
void *fdt;
bool finalized;
struct mutex lock; /* protects KHO FDT finalization */
struct list_head sub_fdts;
struct mutex fdts_lock;
struct kho_mem_track track;
/* First chunk of serialized preserved memory map */
struct khoser_mem_chunk *preserved_mem_map;
struct kho_debugfs dbg;
};
static struct kho_out kho_out = {
.chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head),
.lock = __MUTEX_INITIALIZER(kho_out.lock),
.ser = {
.track = {
.orders = XARRAY_INIT(kho_out.ser.track.orders, 0),
},
.track = {
.orders = XARRAY_INIT(kho_out.track.orders, 0),
},
.sub_fdts = LIST_HEAD_INIT(kho_out.sub_fdts),
.fdts_lock = __MUTEX_INITIALIZER(kho_out.fdts_lock),
.finalized = false,
};
@@ -369,7 +373,7 @@ static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
}
}
static int kho_mem_serialize(struct kho_serialization *ser)
static int kho_mem_serialize(struct kho_out *kho_out)
{
struct khoser_mem_chunk *first_chunk = NULL;
struct khoser_mem_chunk *chunk = NULL;
@@ -377,7 +381,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
unsigned long order;
int err = -ENOMEM;
xa_for_each(&ser->track.orders, order, physxa) {
xa_for_each(&kho_out->track.orders, order, physxa) {
struct kho_mem_phys_bits *bits;
unsigned long phys;
@@ -409,7 +413,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
}
}
ser->preserved_mem_map = first_chunk;
kho_out->preserved_mem_map = first_chunk;
return 0;
@@ -670,7 +674,6 @@ static void __init kho_reserve_scratch(void)
/**
* kho_add_subtree - record the physical address of a sub FDT in KHO root tree.
* @ser: serialization control object passed by KHO notifiers.
* @name: name of the sub tree.
* @fdt: the sub tree blob.
*
@@ -684,34 +687,41 @@ static void __init kho_reserve_scratch(void)
*
* Return: 0 on success, error code on failure
*/
int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt)
int kho_add_subtree(const char *name, void *fdt)
{
int err = 0;
u64 phys = (u64)virt_to_phys(fdt);
void *root = page_to_virt(ser->fdt);
struct kho_sub_fdt *sub_fdt;
err |= fdt_begin_node(root, name);
err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys));
err |= fdt_end_node(root);
sub_fdt = kmalloc(sizeof(*sub_fdt), GFP_KERNEL);
if (!sub_fdt)
return -ENOMEM;
if (err)
return err;
INIT_LIST_HEAD(&sub_fdt->l);
sub_fdt->name = name;
sub_fdt->fdt = fdt;
return kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false);
guard(mutex)(&kho_out.fdts_lock);
list_add_tail(&sub_fdt->l, &kho_out.sub_fdts);
WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false));
return 0;
}
EXPORT_SYMBOL_GPL(kho_add_subtree);
int register_kho_notifier(struct notifier_block *nb)
void kho_remove_subtree(void *fdt)
{
return blocking_notifier_chain_register(&kho_out.chain_head, nb);
}
EXPORT_SYMBOL_GPL(register_kho_notifier);
struct kho_sub_fdt *sub_fdt;
int unregister_kho_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_unregister(&kho_out.chain_head, nb);
guard(mutex)(&kho_out.fdts_lock);
list_for_each_entry(sub_fdt, &kho_out.sub_fdts, l) {
if (sub_fdt->fdt == fdt) {
list_del(&sub_fdt->l);
kfree(sub_fdt);
kho_debugfs_fdt_remove(&kho_out.dbg, fdt);
break;
}
}
}
EXPORT_SYMBOL_GPL(unregister_kho_notifier);
EXPORT_SYMBOL_GPL(kho_remove_subtree);
/**
* kho_preserve_folio - preserve a folio across kexec.
@@ -726,7 +736,7 @@ int kho_preserve_folio(struct folio *folio)
{
const unsigned long pfn = folio_pfn(folio);
const unsigned int order = folio_order(folio);
struct kho_mem_track *track = &kho_out.ser.track;
struct kho_mem_track *track = &kho_out.track;
if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
return -EINVAL;
@@ -747,7 +757,7 @@ EXPORT_SYMBOL_GPL(kho_preserve_folio);
*/
int kho_preserve_pages(struct page *page, unsigned int nr_pages)
{
struct kho_mem_track *track = &kho_out.ser.track;
struct kho_mem_track *track = &kho_out.track;
const unsigned long start_pfn = page_to_pfn(page);
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn = start_pfn;
@@ -849,7 +859,7 @@ static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur
static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk,
unsigned short order)
{
struct kho_mem_track *track = &kho_out.ser.track;
struct kho_mem_track *track = &kho_out.track;
unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
__kho_unpreserve(track, pfn, pfn + 1);
@@ -1031,11 +1041,11 @@ EXPORT_SYMBOL_GPL(kho_restore_vmalloc);
static int __kho_abort(void)
{
int err;
int err = 0;
unsigned long order;
struct kho_mem_phys *physxa;
xa_for_each(&kho_out.ser.track.orders, order, physxa) {
xa_for_each(&kho_out.track.orders, order, physxa) {
struct kho_mem_phys_bits *bits;
unsigned long phys;
@@ -1045,17 +1055,13 @@ static int __kho_abort(void)
xa_destroy(&physxa->phys_bits);
kfree(physxa);
}
xa_destroy(&kho_out.ser.track.orders);
xa_destroy(&kho_out.track.orders);
if (kho_out.ser.preserved_mem_map) {
kho_mem_ser_free(kho_out.ser.preserved_mem_map);
kho_out.ser.preserved_mem_map = NULL;
if (kho_out.preserved_mem_map) {
kho_mem_ser_free(kho_out.preserved_mem_map);
kho_out.preserved_mem_map = NULL;
}
err = blocking_notifier_call_chain(&kho_out.chain_head, KEXEC_KHO_ABORT,
NULL);
err = notifier_to_errno(err);
if (err)
pr_err("Failed to abort KHO finalization: %d\n", err);
@@ -1078,7 +1084,8 @@ int kho_abort(void)
return ret;
kho_out.finalized = false;
kho_debugfs_cleanup(&kho_out.dbg);
kho_debugfs_fdt_remove(&kho_out.dbg, kho_out.fdt);
return 0;
}
@@ -1087,41 +1094,46 @@ static int __kho_finalize(void)
{
int err = 0;
u64 *preserved_mem_map;
void *fdt = page_to_virt(kho_out.ser.fdt);
void *root = kho_out.fdt;
struct kho_sub_fdt *fdt;
err |= fdt_create(fdt, PAGE_SIZE);
err |= fdt_finish_reservemap(fdt);
err |= fdt_begin_node(fdt, "");
err |= fdt_property_string(fdt, "compatible", KHO_FDT_COMPATIBLE);
err |= fdt_create(root, PAGE_SIZE);
err |= fdt_finish_reservemap(root);
err |= fdt_begin_node(root, "");
err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE);
/**
* Reserve the preserved-memory-map property in the root FDT, so
* that all property definitions will precede subnodes created by
* KHO callers.
*/
err |= fdt_property_placeholder(fdt, PROP_PRESERVED_MEMORY_MAP,
err |= fdt_property_placeholder(root, PROP_PRESERVED_MEMORY_MAP,
sizeof(*preserved_mem_map),
(void **)&preserved_mem_map);
if (err)
goto abort;
err = kho_preserve_folio(page_folio(kho_out.ser.fdt));
err = kho_preserve_folio(virt_to_folio(kho_out.fdt));
if (err)
goto abort;
err = blocking_notifier_call_chain(&kho_out.chain_head,
KEXEC_KHO_FINALIZE, &kho_out.ser);
err = notifier_to_errno(err);
err = kho_mem_serialize(&kho_out);
if (err)
goto abort;
err = kho_mem_serialize(&kho_out.ser);
if (err)
goto abort;
*preserved_mem_map = (u64)virt_to_phys(kho_out.preserved_mem_map);
*preserved_mem_map = (u64)virt_to_phys(kho_out.ser.preserved_mem_map);
mutex_lock(&kho_out.fdts_lock);
list_for_each_entry(fdt, &kho_out.sub_fdts, l) {
phys_addr_t phys = virt_to_phys(fdt->fdt);
err |= fdt_end_node(fdt);
err |= fdt_finish(fdt);
err |= fdt_begin_node(root, fdt->name);
err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys));
err |= fdt_end_node(root);
}
mutex_unlock(&kho_out.fdts_lock);
err |= fdt_end_node(root);
err |= fdt_finish(root);
abort:
if (err) {
@@ -1149,8 +1161,10 @@ int kho_finalize(void)
kho_out.finalized = true;
return kho_debugfs_fdt_add(&kho_out.dbg, "fdt",
page_to_virt(kho_out.ser.fdt), true);
WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, "fdt",
kho_out.fdt, true));
return 0;
}
bool kho_finalized(void)
@@ -1233,15 +1247,17 @@ static __init int kho_init(void)
{
int err = 0;
const void *fdt = kho_get_fdt();
struct page *fdt_page;
if (!kho_enable)
return 0;
kho_out.ser.fdt = alloc_page(GFP_KERNEL);
if (!kho_out.ser.fdt) {
fdt_page = alloc_page(GFP_KERNEL);
if (!fdt_page) {
err = -ENOMEM;
goto err_free_scratch;
}
kho_out.fdt = page_to_virt(fdt_page);
err = kho_debugfs_init();
if (err)
@@ -1269,8 +1285,8 @@ static __init int kho_init(void)
return 0;
err_free_fdt:
put_page(kho_out.ser.fdt);
kho_out.ser.fdt = NULL;
put_page(fdt_page);
kho_out.fdt = NULL;
err_free_scratch:
for (int i = 0; i < kho_scratch_cnt; i++) {
void *start = __va(kho_scratch[i].addr);
@@ -1281,7 +1297,7 @@ static __init int kho_init(void)
kho_enable = false;
return err;
}
late_initcall(kho_init);
fs_initcall(kho_init);
static void __init kho_release_scratch(void)
{
@@ -1417,7 +1433,7 @@ int kho_fill_kimage(struct kimage *image)
if (!kho_out.finalized)
return 0;
image->kho.fdt = page_to_phys(kho_out.ser.fdt);
image->kho.fdt = virt_to_phys(kho_out.fdt);
scratch_size = sizeof(*kho_scratch) * kho_scratch_cnt;
scratch = (struct kexec_buf){

View File

@@ -61,14 +61,17 @@ int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
return __kho_debugfs_fdt_add(&dbg->fdt_list, dir, name, fdt);
}
void kho_debugfs_cleanup(struct kho_debugfs *dbg)
void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt)
{
struct fdt_debugfs *ff, *tmp;
struct fdt_debugfs *ff;
list_for_each_entry_safe(ff, tmp, &dbg->fdt_list, list) {
debugfs_remove(ff->file);
list_del(&ff->list);
kfree(ff);
list_for_each_entry(ff, &dbg->fdt_list, list) {
if (ff->wrapper.data == fdt) {
debugfs_remove(ff->file);
list_del(&ff->list);
kfree(ff);
break;
}
}
}

View File

@@ -32,7 +32,7 @@ void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt);
int kho_out_debugfs_init(struct kho_debugfs *dbg);
int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
const void *fdt, bool root);
void kho_debugfs_cleanup(struct kho_debugfs *dbg);
void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt);
#else
static inline int kho_debugfs_init(void) { return 0; }
static inline void kho_in_debugfs_init(struct kho_debugfs *dbg,
@@ -40,7 +40,8 @@ static inline void kho_in_debugfs_init(struct kho_debugfs *dbg,
static inline int kho_out_debugfs_init(struct kho_debugfs *dbg) { return 0; }
static inline int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
const void *fdt, bool root) { return 0; }
static inline void kho_debugfs_cleanup(struct kho_debugfs *dbg) {}
static inline void kho_debugfs_fdt_remove(struct kho_debugfs *dbg,
void *fdt) { }
#endif /* CONFIG_KEXEC_HANDOVER_DEBUGFS */
#ifdef CONFIG_KEXEC_HANDOVER_DEBUG

View File

@@ -39,33 +39,6 @@ struct kho_test_state {
static struct kho_test_state kho_test_state;
static int kho_test_notifier(struct notifier_block *self, unsigned long cmd,
void *v)
{
struct kho_test_state *state = &kho_test_state;
struct kho_serialization *ser = v;
int err = 0;
switch (cmd) {
case KEXEC_KHO_ABORT:
return NOTIFY_DONE;
case KEXEC_KHO_FINALIZE:
/* Handled below */
break;
default:
return NOTIFY_BAD;
}
err |= kho_preserve_folio(state->fdt);
err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt));
return err ? NOTIFY_BAD : NOTIFY_DONE;
}
static struct notifier_block kho_test_nb = {
.notifier_call = kho_test_notifier,
};
static int kho_test_save_data(struct kho_test_state *state, void *fdt)
{
phys_addr_t *folios_info __free(kvfree) = NULL;
@@ -120,6 +93,7 @@ static int kho_test_prepare_fdt(struct kho_test_state *state)
fdt = folio_address(state->fdt);
err |= kho_preserve_folio(state->fdt);
err |= fdt_create(fdt, fdt_size);
err |= fdt_finish_reservemap(fdt);
@@ -131,6 +105,7 @@ static int kho_test_prepare_fdt(struct kho_test_state *state)
err |= fdt_finish(fdt);
err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt));
if (err)
folio_put(state->fdt);
@@ -203,10 +178,6 @@ static int kho_test_save(void)
if (err)
goto err_free_folios;
err = register_kho_notifier(&kho_test_nb);
if (err)
goto err_free_fdt;
return 0;
err_free_fdt:
@@ -329,7 +300,7 @@ static void kho_test_cleanup(void)
static void __exit kho_test_exit(void)
{
unregister_kho_notifier(&kho_test_nb);
kho_remove_subtree(folio_address(kho_test_state.fdt));
kho_test_cleanup();
}
module_exit(kho_test_exit);

View File

@@ -2444,53 +2444,18 @@ int reserve_mem_release_by_name(const char *name)
#define MEMBLOCK_KHO_FDT "memblock"
#define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
#define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
static struct page *kho_fdt;
static int reserve_mem_kho_finalize(struct kho_serialization *ser)
{
int err = 0, i;
for (i = 0; i < reserved_mem_count; i++) {
struct reserve_mem_table *map = &reserved_mem_table[i];
struct page *page = phys_to_page(map->start);
unsigned int nr_pages = map->size >> PAGE_SHIFT;
err |= kho_preserve_pages(page, nr_pages);
}
err |= kho_preserve_folio(page_folio(kho_fdt));
err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
return notifier_from_errno(err);
}
static int reserve_mem_kho_notifier(struct notifier_block *self,
unsigned long cmd, void *v)
{
switch (cmd) {
case KEXEC_KHO_FINALIZE:
return reserve_mem_kho_finalize((struct kho_serialization *)v);
case KEXEC_KHO_ABORT:
return NOTIFY_DONE;
default:
return NOTIFY_BAD;
}
}
static struct notifier_block reserve_mem_kho_nb = {
.notifier_call = reserve_mem_kho_notifier,
};
static int __init prepare_kho_fdt(void)
{
int err = 0, i;
struct page *fdt_page;
void *fdt;
kho_fdt = alloc_page(GFP_KERNEL);
if (!kho_fdt)
fdt_page = alloc_page(GFP_KERNEL);
if (!fdt_page)
return -ENOMEM;
fdt = page_to_virt(kho_fdt);
fdt = page_to_virt(fdt_page);
err |= fdt_create(fdt, PAGE_SIZE);
err |= fdt_finish_reservemap(fdt);
@@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
for (i = 0; i < reserved_mem_count; i++) {
struct reserve_mem_table *map = &reserved_mem_table[i];
struct page *page = phys_to_page(map->start);
unsigned int nr_pages = map->size >> PAGE_SHIFT;
err |= kho_preserve_pages(page, nr_pages);
err |= fdt_begin_node(fdt, map->name);
err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
@@ -2507,13 +2475,16 @@ static int __init prepare_kho_fdt(void)
err |= fdt_end_node(fdt);
}
err |= fdt_end_node(fdt);
err |= fdt_finish(fdt);
err |= kho_preserve_folio(page_folio(fdt_page));
if (!err)
err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
if (err) {
pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
put_page(kho_fdt);
kho_fdt = NULL;
put_page(fdt_page);
}
return err;
@@ -2529,13 +2500,6 @@ static int __init reserve_mem_init(void)
err = prepare_kho_fdt();
if (err)
return err;
err = register_kho_notifier(&reserve_mem_kho_nb);
if (err) {
put_page(kho_fdt);
kho_fdt = NULL;
}
return err;
}
late_initcall(reserve_mem_init);