Merge branch 'net-mana-fix-probe-remove-error-path-bugs'

Erni Sri Satya Vennela says:

====================
net: mana: Fix probe/remove error path bugs

Fix five bugs in mana_probe()/mana_remove() error handling that can
cause warnings on uninitialized work structs, NULL pointer dereferences,
masked errors, and resource leaks when early probe steps fail.

Patches 1-2 move work struct initialization (link_change_work and
gf_stats_work) to before any error path that could trigger
mana_remove(), preventing WARN_ON in __flush_work() or debug object
warnings when sync cancellation runs on uninitialized work structs.

Patch 3 guards mana_remove() against double invocation. If PM resume
fails, mana_probe() calls mana_remove() which sets gdma_context and
driver_data to NULL. A failed resume does not unbind the driver, so
when the device is eventually unbound, mana_remove() is called again
and dereferences NULL, causing a kernel panic. An early return on
NULL gdma_context or driver_data makes the second call harmless.

Patch 4 prevents add_adev() from overwriting a port probe error,
which could leave the driver in a broken state with NULL ports while
reporting success.

Patch 5 changes 'goto out' to 'break' in mana_remove()'s port loop
so that mana_destroy_eq() is always reached, preventing EQ leaks when
a NULL port is encountered.
====================

Link: https://patch.msgid.link/20260420124741.1056179-1-ernis@linux.microsoft.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni
2026-04-23 12:49:16 +02:00

View File

@@ -3631,8 +3631,12 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
ac->gdma_dev = gd;
gd->driver_data = ac;
INIT_WORK(&ac->link_change_work, mana_link_state_handle);
}
INIT_DELAYED_WORK(&ac->gf_stats_work, mana_gf_stats_work_handler);
err = mana_create_eq(ac);
if (err) {
dev_err(dev, "Failed to create EQs: %d\n", err);
@@ -3648,8 +3652,6 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
if (!resuming) {
ac->num_ports = num_ports;
INIT_WORK(&ac->link_change_work, mana_link_state_handle);
} else {
if (ac->num_ports != num_ports) {
dev_err(dev, "The number of vPorts changed: %d->%d\n",
@@ -3678,10 +3680,9 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
if (!resuming) {
for (i = 0; i < ac->num_ports; i++) {
err = mana_probe_port(ac, i, &ac->ports[i]);
/* we log the port for which the probe failed and stop
* probes for subsequent ports.
* Note that we keep running ports, for which the probes
* were successful, unless add_adev fails too
/* Log the port for which the probe failed, stop probing
* subsequent ports, and skip add_adev.
* mana_remove() will clean up already-probed ports.
*/
if (err) {
dev_err(dev, "Probe Failed for port %d\n", i);
@@ -3695,10 +3696,9 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
enable_work(&apc->queue_reset_work);
err = mana_attach(ac->ports[i]);
rtnl_unlock();
/* we log the port for which the attach failed and stop
* attach for subsequent ports
* Note that we keep running ports, for which the attach
* were successful, unless add_adev fails too
/* Log the port for which the attach failed, stop
* attaching subsequent ports, and skip add_adev.
* mana_remove() will clean up already-attached ports.
*/
if (err) {
dev_err(dev, "Attach Failed for port %d\n", i);
@@ -3707,9 +3707,9 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
}
}
err = add_adev(gd, "eth");
if (!err)
err = add_adev(gd, "eth");
INIT_DELAYED_WORK(&ac->gf_stats_work, mana_gf_stats_work_handler);
schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD);
out:
@@ -3730,11 +3730,16 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
struct gdma_context *gc = gd->gdma_context;
struct mana_context *ac = gd->driver_data;
struct mana_port_context *apc;
struct device *dev = gc->dev;
struct device *dev;
struct net_device *ndev;
int err;
int i;
if (!gc || !ac)
return;
dev = gc->dev;
disable_work_sync(&ac->link_change_work);
cancel_delayed_work_sync(&ac->gf_stats_work);
@@ -3747,7 +3752,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
if (!ndev) {
if (i == 0)
dev_err(dev, "No net device to remove\n");
goto out;
break;
}
apc = netdev_priv(ndev);
@@ -3778,7 +3783,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
}
mana_destroy_eq(ac);
out:
if (ac->per_port_queue_reset_wq) {
destroy_workqueue(ac->per_port_queue_reset_wq);
ac->per_port_queue_reset_wq = NULL;