Pull IPMI fixes from Corey Minyard:
 "Fix a number of issues that came up recently

  The first two fixes are workarounds for buggy IPMI hardware. The
  hardware says it has data for the IPMI driver to read constantly, so
  the driver reads the data constantly, causing any new requests to be
  blocked.

  The first fix was to check for invalid data right when the data was
  read from the device and stop the operation there (there was a later
  check for invalid data, but it could not stop the operation at that
  point). It turned out the device was providing good data, so that
  didn't fix the issue, but it's still a good check.

  The second fix stops fetching this data after a few fetches and allows
  other operations to occur. The driver won't work very well, but at
  least it won't wedge. This seems to fix the issue.

  The third issue is a problem I spotted while working on the previous
  issue where if a certain memory allocation failed the driver would
  stop working.

  The fourth issue is a problem was a missing set to NULL on a PTR_ERR()
  return, introduced in the previous series for 7.1"

* tag 'for-linus-7.1-2' of https://github.com/cminyard/linux-ipmi:
  ipmi:ssif: NULL thread on error
  ipmi:si: Return state to normal if message allocation fails
  ipmi: Add limits to event and receive message requests
  ipmi: Check event message buffer response for bad data
This commit is contained in:
Linus Torvalds
2026-05-04 12:48:30 -07:00
2 changed files with 78 additions and 16 deletions

View File

@@ -168,6 +168,10 @@ struct smi_info {
OEM2_DATA_AVAIL)
unsigned char msg_flags;
/* When requesting events and messages, don't do it forever. */
unsigned int num_requests_in_a_row;
bool last_was_flag_fetch;
/* Does the BMC have an event buffer? */
bool has_event_buffer;
@@ -410,7 +414,10 @@ static void start_getting_msg_queue(struct smi_info *smi_info)
start_new_msg(smi_info, smi_info->curr_msg->data,
smi_info->curr_msg->data_size);
smi_info->si_state = SI_GETTING_MESSAGES;
if (smi_info->si_state != SI_GETTING_MESSAGES) {
smi_info->num_requests_in_a_row = 0;
smi_info->si_state = SI_GETTING_MESSAGES;
}
}
static void start_getting_events(struct smi_info *smi_info)
@@ -421,7 +428,10 @@ static void start_getting_events(struct smi_info *smi_info)
start_new_msg(smi_info, smi_info->curr_msg->data,
smi_info->curr_msg->data_size);
smi_info->si_state = SI_GETTING_EVENTS;
if (smi_info->si_state != SI_GETTING_EVENTS) {
smi_info->num_requests_in_a_row = 0;
smi_info->si_state = SI_GETTING_EVENTS;
}
}
/*
@@ -487,15 +497,19 @@ static void handle_flags(struct smi_info *smi_info)
} else if (smi_info->msg_flags & RECEIVE_MSG_AVAIL) {
/* Messages available. */
smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
if (!smi_info->curr_msg)
if (!smi_info->curr_msg) {
smi_info->si_state = SI_NORMAL;
return;
}
start_getting_msg_queue(smi_info);
} else if (smi_info->msg_flags & EVENT_MSG_BUFFER_FULL) {
/* Events available. */
smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
if (!smi_info->curr_msg)
if (!smi_info->curr_msg) {
smi_info->si_state = SI_NORMAL;
return;
}
start_getting_events(smi_info);
} else if (smi_info->msg_flags & OEM_DATA_AVAIL &&
@@ -595,6 +609,7 @@ static void handle_transaction_done(struct smi_info *smi_info)
smi_info->si_state = SI_NORMAL;
} else {
smi_info->msg_flags = msg[3];
smi_info->last_was_flag_fetch = true;
handle_flags(smi_info);
}
break;
@@ -630,7 +645,13 @@ static void handle_transaction_done(struct smi_info *smi_info)
*/
msg = smi_info->curr_msg;
smi_info->curr_msg = NULL;
if (msg->rsp[2] != 0) {
/*
* It appears some BMCs, with no event data, return no
* data in the message and not a 0x80 error as the
* spec says they should. Shut down processing if
* the data is not the right length.
*/
if (msg->rsp[2] != 0 || msg->rsp_size != 19) {
/* Error getting event, probably done. */
msg->done(msg);
@@ -640,6 +661,11 @@ static void handle_transaction_done(struct smi_info *smi_info)
} else {
smi_inc_stat(smi_info, events);
smi_info->num_requests_in_a_row++;
if (smi_info->num_requests_in_a_row > 10)
/* Stop if we do this too many times. */
smi_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL;
/*
* Do this before we deliver the message
* because delivering the message releases the
@@ -678,6 +704,11 @@ static void handle_transaction_done(struct smi_info *smi_info)
} else {
smi_inc_stat(smi_info, incoming_messages);
smi_info->num_requests_in_a_row++;
if (smi_info->num_requests_in_a_row > 10)
/* Stop if we do this too many times. */
smi_info->msg_flags &= ~RECEIVE_MSG_AVAIL;
/*
* Do this before we deliver the message
* because delivering the message releases the
@@ -819,6 +850,26 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
goto out;
}
/*
* If we are currently idle, or if the last thing that was
* done was a flag fetch and there is a message pending, try
* to start the next message.
*
* We do the waiting message check to avoid a stuck flag
* completely wedging the driver. Let a message through
* in between flag operations if that happens.
*/
if (si_sm_result == SI_SM_IDLE ||
(si_sm_result == SI_SM_ATTN && smi_info->waiting_msg &&
smi_info->last_was_flag_fetch)) {
smi_info->last_was_flag_fetch = false;
smi_inc_stat(smi_info, idles);
si_sm_result = start_next_msg(smi_info);
if (si_sm_result != SI_SM_IDLE)
goto restart;
}
/*
* We prefer handling attn over new messages. But don't do
* this if there is not yet an upper layer to handle anything.
@@ -846,15 +897,6 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
}
}
/* If we are currently idle, try to start the next message. */
if (si_sm_result == SI_SM_IDLE) {
smi_inc_stat(smi_info, idles);
si_sm_result = start_next_msg(smi_info);
if (si_sm_result != SI_SM_IDLE)
goto restart;
}
if ((si_sm_result == SI_SM_IDLE)
&& (atomic_read(&smi_info->req_events))) {
/*

View File

@@ -225,6 +225,9 @@ struct ssif_info {
bool has_event_buffer;
bool supports_alert;
/* When requesting events and messages, don't do it forever. */
unsigned int num_requests_in_a_row;
/*
* Used to tell what we should do with alerts. If we are
* waiting on a response, read the data immediately.
@@ -413,7 +416,10 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags)
}
ssif_info->curr_msg = msg;
ssif_info->ssif_state = SSIF_GETTING_EVENTS;
if (ssif_info->ssif_state != SSIF_GETTING_EVENTS) {
ssif_info->num_requests_in_a_row = 0;
ssif_info->ssif_state = SSIF_GETTING_EVENTS;
}
ipmi_ssif_unlock_cond(ssif_info, flags);
msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
@@ -436,7 +442,10 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info,
}
ssif_info->curr_msg = msg;
ssif_info->ssif_state = SSIF_GETTING_MESSAGES;
if (ssif_info->ssif_state != SSIF_GETTING_MESSAGES) {
ssif_info->num_requests_in_a_row = 0;
ssif_info->ssif_state = SSIF_GETTING_MESSAGES;
}
ipmi_ssif_unlock_cond(ssif_info, flags);
msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
@@ -843,6 +852,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL;
handle_flags(ssif_info, flags);
} else {
ssif_info->num_requests_in_a_row++;
if (ssif_info->num_requests_in_a_row > 10)
/* Stop if we do this too many times. */
ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL;
handle_flags(ssif_info, flags);
ssif_inc_stat(ssif_info, events);
deliver_recv_msg(ssif_info, msg);
@@ -876,6 +890,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL;
handle_flags(ssif_info, flags);
} else {
ssif_info->num_requests_in_a_row++;
if (ssif_info->num_requests_in_a_row > 10)
/* Stop if we do this too many times. */
ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL;
ssif_inc_stat(ssif_info, incoming_messages);
handle_flags(ssif_info, flags);
deliver_recv_msg(ssif_info, msg);
@@ -1886,6 +1905,7 @@ static int ssif_probe(struct i2c_client *client)
"kssif%4.4x", thread_num);
if (IS_ERR(ssif_info->thread)) {
rv = PTR_ERR(ssif_info->thread);
ssif_info->thread = NULL;
dev_notice(&ssif_info->client->dev,
"Could not start kernel thread: error %d\n",
rv);