From 9e541b3cee70a3bbe86b176c903c23b29fe033cd Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 10 Dec 2025 21:29:05 +0800 Subject: [PATCH 1/3] PCI: trace: Add generic RAS tracepoint for hotplug event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hotplug events are critical indicators for analyzing hardware health, and surprise link downs can significantly impact system performance and reliability. Define a new TRACING_SYSTEM named "pci", add a generic RAS tracepoint for hotplug event to help health checks. Add enum pci_hotplug_event in include/uapi/linux/pci.h so applications like rasdaemon can register tracepoint event handlers for it. The following output is generated when a device is hotplugged: $ echo 1 > /sys/kernel/debug/tracing/events/pci/pci_hp_event/enable $ cat /sys/kernel/debug/tracing/trace_pipe irq/51-pciehp-88 [001] ..... 1311.177459: pci_hp_event: 0000:00:02.0 slot:10, event:CARD_PRESENT irq/51-pciehp-88 [001] ..... 1311.177566: pci_hp_event: 0000:00:02.0 slot:10, event:LINK_UP Suggested-by: Lukas Wunner Signed-off-by: Shuai Xue Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Jonathan Cameron Reviewed-by: Steven Rostedt (Google) # for trace event Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20251210132907.58799-2-xueshuai@linux.alibaba.com --- drivers/pci/Makefile | 3 ++ drivers/pci/hotplug/pciehp_ctrl.c | 31 ++++++++++--- drivers/pci/trace.c | 11 +++++ include/trace/events/pci.h | 72 +++++++++++++++++++++++++++++++ include/uapi/linux/pci.h | 7 +++ 5 files changed, 118 insertions(+), 6 deletions(-) create mode 100644 drivers/pci/trace.c create mode 100644 include/trace/events/pci.h diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index e10cfe5a280b..8c259a9a8796 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -47,3 +47,6 @@ obj-y += controller/ obj-y += switch/ subdir-ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG + +CFLAGS_trace.o := -I$(src) +obj-$(CONFIG_TRACING) += trace.o diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index bcc938d4420f..7805f697a02c 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "../pci.h" #include "pciehp.h" @@ -244,12 +245,20 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events) case ON_STATE: ctrl->state = POWEROFF_STATE; mutex_unlock(&ctrl->state_lock); - if (events & PCI_EXP_SLTSTA_DLLSC) + if (events & PCI_EXP_SLTSTA_DLLSC) { ctrl_info(ctrl, "Slot(%s): Link Down\n", slot_name(ctrl)); - if (events & PCI_EXP_SLTSTA_PDC) + trace_pci_hp_event(pci_name(ctrl->pcie->port), + slot_name(ctrl), + PCI_HOTPLUG_LINK_DOWN); + } + if (events & PCI_EXP_SLTSTA_PDC) { ctrl_info(ctrl, "Slot(%s): Card not present\n", slot_name(ctrl)); + trace_pci_hp_event(pci_name(ctrl->pcie->port), + slot_name(ctrl), + PCI_HOTPLUG_CARD_NOT_PRESENT); + } pciehp_disable_slot(ctrl, SURPRISE_REMOVAL); break; default: @@ -269,6 +278,9 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events) INDICATOR_NOOP); ctrl_info(ctrl, "Slot(%s): Card not present\n", slot_name(ctrl)); + trace_pci_hp_event(pci_name(ctrl->pcie->port), + slot_name(ctrl), + PCI_HOTPLUG_CARD_NOT_PRESENT); } mutex_unlock(&ctrl->state_lock); return; @@ -281,12 +293,19 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events) case OFF_STATE: ctrl->state = POWERON_STATE; mutex_unlock(&ctrl->state_lock); - if (present) + if (present) { ctrl_info(ctrl, "Slot(%s): Card present\n", slot_name(ctrl)); - if (link_active) - ctrl_info(ctrl, "Slot(%s): Link Up\n", - slot_name(ctrl)); + trace_pci_hp_event(pci_name(ctrl->pcie->port), + slot_name(ctrl), + PCI_HOTPLUG_CARD_PRESENT); + } + if (link_active) { + ctrl_info(ctrl, "Slot(%s): Link Up\n", slot_name(ctrl)); + trace_pci_hp_event(pci_name(ctrl->pcie->port), + slot_name(ctrl), + PCI_HOTPLUG_LINK_UP); + } ctrl->request_result = pciehp_enable_slot(ctrl); break; default: diff --git a/drivers/pci/trace.c b/drivers/pci/trace.c new file mode 100644 index 000000000000..cf11abca8602 --- /dev/null +++ b/drivers/pci/trace.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tracepoints for PCI system + * + * Copyright (C) 2025 Alibaba Corporation + */ + +#include + +#define CREATE_TRACE_POINTS +#include diff --git a/include/trace/events/pci.h b/include/trace/events/pci.h new file mode 100644 index 000000000000..39e512a167ee --- /dev/null +++ b/include/trace/events/pci.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM pci + +#if !defined(_TRACE_HW_EVENT_PCI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HW_EVENT_PCI_H + +#include + +#define PCI_HOTPLUG_EVENT \ + EM(PCI_HOTPLUG_LINK_UP, "LINK_UP") \ + EM(PCI_HOTPLUG_LINK_DOWN, "LINK_DOWN") \ + EM(PCI_HOTPLUG_CARD_PRESENT, "CARD_PRESENT") \ + EMe(PCI_HOTPLUG_CARD_NOT_PRESENT, "CARD_NOT_PRESENT") + +/* Enums require being exported to userspace, for user tool parsing */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +PCI_HOTPLUG_EVENT + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) {a, b}, +#define EMe(a, b) {a, b} + +/* + * Note: For generic PCI hotplug events, we pass already-resolved strings + * (port_name, slot) instead of driver-specific structures like 'struct + * controller'. This is because different PCI hotplug drivers (pciehp, cpqphp, + * ibmphp, shpchp) define their own versions of 'struct controller' with + * different fields and helper functions. Using driver-specific structures would + * make the tracepoint interface non-generic and cause compatibility issues + * across different drivers. + */ +TRACE_EVENT(pci_hp_event, + + TP_PROTO(const char *port_name, + const char *slot, + const int event), + + TP_ARGS(port_name, slot, event), + + TP_STRUCT__entry( + __string( port_name, port_name ) + __string( slot, slot ) + __field( int, event ) + ), + + TP_fast_assign( + __assign_str(port_name); + __assign_str(slot); + __entry->event = event; + ), + + TP_printk("%s slot:%s, event:%s\n", + __get_str(port_name), + __get_str(slot), + __print_symbolic(__entry->event, PCI_HOTPLUG_EVENT) + ) +); + +#endif /* _TRACE_HW_EVENT_PCI_H */ + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/pci.h b/include/uapi/linux/pci.h index a769eefc5139..4f150028965d 100644 --- a/include/uapi/linux/pci.h +++ b/include/uapi/linux/pci.h @@ -39,4 +39,11 @@ #define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap state to MEM space. */ #define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /* Enable/disable write-combining. */ +enum pci_hotplug_event { + PCI_HOTPLUG_LINK_UP, + PCI_HOTPLUG_LINK_DOWN, + PCI_HOTPLUG_CARD_PRESENT, + PCI_HOTPLUG_CARD_NOT_PRESENT, +}; + #endif /* _UAPILINUX_PCI_H */ From d4318c1a79ac49f0726dd23a01d1961757b5f98d Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 10 Dec 2025 21:29:06 +0800 Subject: [PATCH 2/3] PCI: trace: Add RAS tracepoint to monitor link speed changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCIe link speed degradation directly impacts system performance and often indicates hardware issues such as faulty devices, physical layer problems, or configuration errors. To this end, add a RAS tracepoint to monitor link speed changes, enabling proactive health checks and diagnostic analysis. The following output is generated when a device is hotplugged: $ echo 1 > /sys/kernel/debug/tracing/events/pci/pcie_link_event/enable $ cat /sys/kernel/debug/tracing/trace_pipe irq/51-pciehp-88 [001] ..... 381.545386: pcie_link_event: 0000:00:02.0 type:4, reason:4, cur_bus_speed:20, max_bus_speed:23, width:1, flit_mode:0, status:DLLLA Suggested-by: Ilpo Järvinen Suggested-by: Matthew W Carlis Suggested-by: Lukas Wunner Signed-off-by: Shuai Xue Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20251210132907.58799-3-xueshuai@linux.alibaba.com --- drivers/pci/hotplug/pciehp_hpc.c | 3 +- drivers/pci/pci.c | 2 +- drivers/pci/pci.h | 21 ++++++++++-- drivers/pci/pcie/bwctrl.c | 4 +-- drivers/pci/probe.c | 9 +++-- include/trace/events/pci.h | 57 ++++++++++++++++++++++++++++++++ 6 files changed, 87 insertions(+), 9 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index bcc51b26d03d..ad5f28f6a8b1 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -320,7 +320,8 @@ int pciehp_check_link_status(struct controller *ctrl) } pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &linksta2); - __pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status, linksta2); + __pcie_update_link_speed(ctrl->pcie->port->subordinate, PCIE_HOTPLUG, + lnk_status, linksta2); if (!found) { ctrl_info(ctrl, "Slot(%s): No device found\n", diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 13dbb405dc31..f034e173819f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4550,7 +4550,7 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt) * Link Speed. */ if (pdev->subordinate) - pcie_update_link_speed(pdev->subordinate); + pcie_update_link_speed(pdev->subordinate, PCIE_LINK_RETRAIN); return rc; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0e67014aa001..c71cfbe78cc3 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -5,6 +5,7 @@ #include #include #include +#include struct pcie_tlp_log; @@ -555,12 +556,28 @@ const char *pci_speed_string(enum pci_bus_speed speed); void __pcie_print_link_status(struct pci_dev *dev, bool verbose); void pcie_report_downtraining(struct pci_dev *dev); -static inline void __pcie_update_link_speed(struct pci_bus *bus, u16 linksta, u16 linksta2) +enum pcie_link_change_reason { + PCIE_LINK_RETRAIN, + PCIE_ADD_BUS, + PCIE_BWCTRL_ENABLE, + PCIE_BWCTRL_IRQ, + PCIE_HOTPLUG, +}; + +static inline void __pcie_update_link_speed(struct pci_bus *bus, + enum pcie_link_change_reason reason, + u16 linksta, u16 linksta2) { bus->cur_bus_speed = pcie_link_speed[linksta & PCI_EXP_LNKSTA_CLS]; bus->flit_mode = (linksta2 & PCI_EXP_LNKSTA2_FLIT) ? 1 : 0; + + trace_pcie_link_event(bus, + reason, + FIELD_GET(PCI_EXP_LNKSTA_NLW, linksta), + linksta & PCI_EXP_LNKSTA_LINK_STATUS_MASK); } -void pcie_update_link_speed(struct pci_bus *bus); + +void pcie_update_link_speed(struct pci_bus *bus, enum pcie_link_change_reason reason); /* Single Root I/O Virtualization */ struct pci_sriov { diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index 36f939f23d34..32f1b30ecb84 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -199,7 +199,7 @@ static void pcie_bwnotif_enable(struct pcie_device *srv) * Update after enabling notifications & clearing status bits ensures * link speed is up to date. */ - pcie_update_link_speed(port->subordinate); + pcie_update_link_speed(port->subordinate, PCIE_BWCTRL_ENABLE); } static void pcie_bwnotif_disable(struct pci_dev *port) @@ -234,7 +234,7 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context) * speed (inside pcie_update_link_speed()) after LBMS has been * cleared to avoid missing link speed changes. */ - pcie_update_link_speed(port->subordinate); + pcie_update_link_speed(port->subordinate, PCIE_BWCTRL_IRQ); return IRQ_HANDLED; } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 41183aed8f5d..392b7dc3d391 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "pci.h" #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ @@ -824,14 +825,16 @@ const char *pci_speed_string(enum pci_bus_speed speed) } EXPORT_SYMBOL_GPL(pci_speed_string); -void pcie_update_link_speed(struct pci_bus *bus) +void pcie_update_link_speed(struct pci_bus *bus, + enum pcie_link_change_reason reason) { struct pci_dev *bridge = bus->self; u16 linksta, linksta2; pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, &linksta); pcie_capability_read_word(bridge, PCI_EXP_LNKSTA2, &linksta2); - __pcie_update_link_speed(bus, linksta, linksta2); + + __pcie_update_link_speed(bus, reason, linksta, linksta2); } EXPORT_SYMBOL_GPL(pcie_update_link_speed); @@ -918,7 +921,7 @@ static void pci_set_bus_speed(struct pci_bus *bus) pcie_capability_read_dword(bridge, PCI_EXP_LNKCAP, &linkcap); bus->max_bus_speed = pcie_link_speed[linkcap & PCI_EXP_LNKCAP_SLS]; - pcie_update_link_speed(bus); + pcie_update_link_speed(bus, PCIE_ADD_BUS); } } diff --git a/include/trace/events/pci.h b/include/trace/events/pci.h index 39e512a167ee..9a9122f62fd3 100644 --- a/include/trace/events/pci.h +++ b/include/trace/events/pci.h @@ -5,6 +5,7 @@ #if !defined(_TRACE_HW_EVENT_PCI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HW_EVENT_PCI_H +#include #include #define PCI_HOTPLUG_EVENT \ @@ -66,6 +67,62 @@ TRACE_EVENT(pci_hp_event, ) ); +#define PCI_EXP_LNKSTA_LINK_STATUS_MASK (PCI_EXP_LNKSTA_LBMS | \ + PCI_EXP_LNKSTA_LABS | \ + PCI_EXP_LNKSTA_LT | \ + PCI_EXP_LNKSTA_DLLLA) + +#define LNKSTA_FLAGS \ + { PCI_EXP_LNKSTA_LT, "LT"}, \ + { PCI_EXP_LNKSTA_DLLLA, "DLLLA"}, \ + { PCI_EXP_LNKSTA_LBMS, "LBMS"}, \ + { PCI_EXP_LNKSTA_LABS, "LABS"} + +TRACE_EVENT(pcie_link_event, + + TP_PROTO(struct pci_bus *bus, + unsigned int reason, + unsigned int width, + unsigned int status + ), + + TP_ARGS(bus, reason, width, status), + + TP_STRUCT__entry( + __string( port_name, pci_name(bus->self)) + __field( unsigned int, type ) + __field( unsigned int, reason ) + __field( unsigned int, cur_bus_speed ) + __field( unsigned int, max_bus_speed ) + __field( unsigned int, width ) + __field( unsigned int, flit_mode ) + __field( unsigned int, link_status ) + ), + + TP_fast_assign( + __assign_str(port_name); + __entry->type = pci_pcie_type(bus->self); + __entry->reason = reason; + __entry->cur_bus_speed = bus->cur_bus_speed; + __entry->max_bus_speed = bus->max_bus_speed; + __entry->width = width; + __entry->flit_mode = bus->flit_mode; + __entry->link_status = status; + ), + + TP_printk("%s type:%d, reason:%d, cur_bus_speed:%d, max_bus_speed:%d, width:%u, flit_mode:%u, status:%s\n", + __get_str(port_name), + __entry->type, + __entry->reason, + __entry->cur_bus_speed, + __entry->max_bus_speed, + __entry->width, + __entry->flit_mode, + __print_flags((unsigned long)__entry->link_status, "|", + LNKSTA_FLAGS) + ) +); + #endif /* _TRACE_HW_EVENT_PCI_H */ /* This part must be outside protection */ From 8236fc613d44e59f6736d6c3e9efffaf26ab7f00 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 10 Dec 2025 21:29:07 +0800 Subject: [PATCH 3/3] Documentation: tracing: Add PCI tracepoint documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCI tracing system provides tracepoints to monitor critical hardware events that can impact system performance and reliability. Add documentation about it. Signed-off-by: Shuai Xue [bhelgaas: squash fixes: https://lore.kernel.org/r/20260108013956.14351-2-bagasdotme@gmail.com https://lore.kernel.org/r/20260108013956.14351-3-bagasdotme@gmail.com] Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20251210132907.58799-4-xueshuai@linux.alibaba.com --- Documentation/trace/events-pci.rst | 74 ++++++++++++++++++++++++++++++ Documentation/trace/index.rst | 1 + 2 files changed, 75 insertions(+) create mode 100644 Documentation/trace/events-pci.rst diff --git a/Documentation/trace/events-pci.rst b/Documentation/trace/events-pci.rst new file mode 100644 index 000000000000..03ff4ad30ddf --- /dev/null +++ b/Documentation/trace/events-pci.rst @@ -0,0 +1,74 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +Subsystem Trace Points: PCI +=========================== + +Overview +======== +The PCI tracing system provides tracepoints to monitor critical hardware events +that can impact system performance and reliability. These events normally show +up here: + + /sys/kernel/tracing/events/pci + +Cf. include/trace/events/pci.h for the events definitions. + +Available Tracepoints +===================== + +pci_hp_event +------------ + +Monitors PCI hotplug events including card insertion/removal and link +state changes. +:: + + pci_hp_event "%s slot:%s, event:%s\n" + +**Event Types**: + +* ``LINK_UP`` - PCIe link established +* ``LINK_DOWN`` - PCIe link lost +* ``CARD_PRESENT`` - Card detected in slot +* ``CARD_NOT_PRESENT`` - Card removed from slot + +**Example Usage**:: + + # Enable the tracepoint + echo 1 > /sys/kernel/debug/tracing/events/pci/pci_hp_event/enable + + # Monitor events (the following output is generated when a device is hotplugged) + cat /sys/kernel/debug/tracing/trace_pipe + irq/51-pciehp-88 [001] ..... 1311.177459: pci_hp_event: 0000:00:02.0 slot:10, event:CARD_PRESENT + + irq/51-pciehp-88 [001] ..... 1311.177566: pci_hp_event: 0000:00:02.0 slot:10, event:LINK_UP + +pcie_link_event +--------------- + +Monitors PCIe link speed changes and provides detailed link status information. +:: + + pcie_link_event "%s type:%d, reason:%d, cur_bus_speed:%d, max_bus_speed:%d, width:%u, flit_mode:%u, status:%s\n" + +**Parameters**: + +* ``type`` - PCIe device type (4=Root Port, etc.) +* ``reason`` - Reason for link change: + + - ``0`` - Link retrain + - ``1`` - Bus enumeration + - ``2`` - Bandwidth notification enable + - ``3`` - Bandwidth notification IRQ + - ``4`` - Hotplug event + + +**Example Usage**:: + + # Enable the tracepoint + echo 1 > /sys/kernel/debug/tracing/events/pci/pcie_link_event/enable + + # Monitor events (the following output is generated when a device is hotplugged) + cat /sys/kernel/debug/tracing/trace_pipe + irq/51-pciehp-88 [001] ..... 381.545386: pcie_link_event: 0000:00:02.0 type:4, reason:4, cur_bus_speed:20, max_bus_speed:23, width:1, flit_mode:0, status:DLLLA diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index b4a429dc4f7a..0a40bfabcf19 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -54,6 +54,7 @@ applications. events-power events-nmi events-msr + events-pci boottime-trace histogram histogram-design