From bb729bf1d6fdf5c2087c1651165c74cef0da1742 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Tue, 10 Mar 2026 23:57:53 +0800 Subject: [PATCH 1/3] driver core: Add conditional guard support for device_lock() Introduce conditional guard version of device_lock() for scenarios that require conditional device lock holding. Suggested-by: Dan Williams Reviewed-by: Dan Williams Acked-by: Greg Kroah-Hartman Signed-off-by: Li Ming Link: https://patch.msgid.link/20260310-fix_access_endpoint_without_drv_check-v1-1-94fe919a0b87@zohomail.com Signed-off-by: Danilo Krummrich --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/device.h b/include/linux/device.h index 0be95294b6e6..4fafee80524b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -911,6 +911,7 @@ static inline void device_unlock(struct device *dev) } DEFINE_GUARD(device, struct device *, device_lock(_T), device_unlock(_T)) +DEFINE_GUARD_COND(device, _intr, device_lock_interruptible(_T), _RET == 0) static inline void device_lock_assert(struct device *dev) { From dc372e5f429ced834d81ff12a945397dc43585a8 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Sat, 14 Mar 2026 15:06:32 +0800 Subject: [PATCH 2/3] cxl/pci: Hold memdev lock in cxl_event_trace_record() cxl_event_config() invokes cxl_mem_get_event_record() to get remain event logs from CXL device during cxl_pci_probe(). If CXL memdev probing failed before that, it is possible to access an invalid endpoint. So adding a cxlmd->driver binding status checking inside cxl_dpa_to_region() to ensure the corresponding endpoint is valid. Besides, cxl_event_trace_record() needs to hold memdev lock to invoke cxl_dpa_to_region() to ensure the memdev probing completed. It is possible that cxl_event_trace_record() is invoked during the CXL memdev probing, especially user or cxl_acpi triggers CXL memdev re-probing. Suggested-by: Dan Williams Reviewed-by: Dan Williams Reviewed-by: Dave Jiang Signed-off-by: Li Ming Link: https://patch.msgid.link/20260314-fix_access_endpoint_without_drv_check-v2-3-4c09edf2e1db@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 5 +++-- drivers/cxl/core/region.c | 8 +++++--- drivers/cxl/cxlmem.h | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index e7a6452bf544..3f34bbabf4d3 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -893,7 +893,7 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, "CXL"); -void cxl_event_trace_record(const struct cxl_memdev *cxlmd, +void cxl_event_trace_record(struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt) @@ -920,6 +920,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, * translations. Take topology mutation locks and lookup * { HPA, REGION } from { DPA, MEMDEV } in the event record. */ + guard(device)(&cxlmd->dev); guard(rwsem_read)(&cxl_rwsem.region); guard(rwsem_read)(&cxl_rwsem.dpa); @@ -968,7 +969,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, } EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL"); -static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, +static void __cxl_event_trace_record(struct cxl_memdev *cxlmd, enum cxl_event_log_type type, struct cxl_event_record_raw *record) { diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 42874948b589..840d52a52c4e 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2950,13 +2950,15 @@ static int __cxl_dpa_to_region(struct device *dev, void *arg) struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_dpa_to_region_context ctx; - struct cxl_port *port; + struct cxl_port *port = cxlmd->endpoint; + + if (!cxlmd->dev.driver) + return NULL; ctx = (struct cxl_dpa_to_region_context) { .dpa = dpa, }; - port = cxlmd->endpoint; - if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port)) + if (cxl_num_decoders_committed(port)) device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); return ctx.cxlr; diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index e21d744d639b..7a34a19c02c8 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -864,7 +864,7 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status); -void cxl_event_trace_record(const struct cxl_memdev *cxlmd, +void cxl_event_trace_record(struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt); From e8069c66d09309579e53567be8ddfa6ccb2f452a Mon Sep 17 00:00:00 2001 From: Li Ming Date: Sat, 14 Mar 2026 15:06:33 +0800 Subject: [PATCH 3/3] cxl/pci: Check memdev driver binding status in cxl_reset_done() cxl_reset_done() accesses the endpoint of the corresponding CXL memdev without endpoint validity checking. By default, cxlmd->endpoint is initialized to -ENXIO, if cxl_reset_done() is triggered after the corresponding CXL memdev probing failed, this results in access to an invalid endpoint. CXL subsystem can always check CXL memdev driver binding status to confirm its endpoint validity. So adding the CXL memdev driver checking inside cxl_reset_done() to avoid accessing an invalid endpoint. Fixes: 934edcd436dc ("cxl: Add post-reset warning if reset results in loss of previously committed HDM decoders") Reviewed-by: Dan Williams Reviewed-by: Dave Jiang Signed-off-by: Li Ming Link: https://patch.msgid.link/20260314-fix_access_endpoint_without_drv_check-v2-4-4c09edf2e1db@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index fbb300a01830..a5922116db2a 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1043,6 +1043,9 @@ static void cxl_reset_done(struct pci_dev *pdev) * that no longer exists. */ guard(device)(&cxlmd->dev); + if (!cxlmd->dev.driver) + return; + if (cxlmd->endpoint && cxl_endpoint_decoder_reset_detected(cxlmd->endpoint)) { dev_crit(dev, "SBR happened without memory regions removal.\n");