mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-02 07:00:42 -04:00
md: mark rdev Faulty when badblocks setting fails
Currently when sync read fails and badblocks set fails (exceeding 512 limit), rdev isn't immediately marked Faulty. Instead 'recovery_disabled' is set and non-In_sync rdevs are removed later. This preserves array availability if bad regions aren't read, but bad sectors might be read by users before rdev removal. This occurs due to incorrect resync/recovery_offset updates that include these bad sectors. When badblocks exceed 512, keeping the disk provides little benefit while adding complexity. Prompt disk replacement is more important. Therefore when badblocks set fails, directly call md_error to mark rdev Faulty immediately, preventing potential data access issues. After this change, cleanup of offset update logic and 'recovery_disabled' handling will follow. Link: https://lore.kernel.org/linux-raid/20260105110300.1442509-6-linan666@huaweicloud.com Fixes:5e5702898e("md/raid10: Handle read errors during recovery better.") Fixes:3a9f28a511("md/raid1: improve handling of read failure during recovery.") Signed-off-by: Li Nan <linan122@huawei.com> Signed-off-by: Yu Kuai <yukuai@fnnas.com>
This commit is contained in:
@@ -10422,8 +10422,14 @@ bool rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
else
|
||||
s += rdev->data_offset;
|
||||
|
||||
if (!badblocks_set(&rdev->badblocks, s, sectors, 0))
|
||||
if (!badblocks_set(&rdev->badblocks, s, sectors, 0)) {
|
||||
/*
|
||||
* Mark the disk as Faulty when setting badblocks fails,
|
||||
* otherwise, bad sectors may be read.
|
||||
*/
|
||||
md_error(mddev, rdev);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Make sure they get written out promptly */
|
||||
if (test_bit(ExternalBbl, &rdev->flags))
|
||||
|
||||
@@ -2115,8 +2115,7 @@ static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
||||
rdev->mddev->recovery);
|
||||
}
|
||||
/* need to record an error - either for the block or the device */
|
||||
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
|
||||
md_error(rdev->mddev, rdev);
|
||||
rdev_set_badblocks(rdev, sector, sectors, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2441,8 +2440,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
if (!success) {
|
||||
/* Cannot read from anywhere - mark it bad */
|
||||
struct md_rdev *rdev = conf->mirrors[read_disk].rdev;
|
||||
if (!rdev_set_badblocks(rdev, sect, s, 0))
|
||||
md_error(mddev, rdev);
|
||||
rdev_set_badblocks(rdev, sect, s, 0);
|
||||
break;
|
||||
}
|
||||
/* write it back and re-read */
|
||||
@@ -2546,7 +2544,6 @@ static void narrow_write_error(struct r1bio *r1_bio, int i)
|
||||
* Badblocks set failed, disk marked Faulty.
|
||||
* No further operations needed.
|
||||
*/
|
||||
md_error(mddev, rdev);
|
||||
bio_put(wbio);
|
||||
break;
|
||||
}
|
||||
@@ -2568,14 +2565,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
|
||||
if (bio->bi_end_io == NULL)
|
||||
continue;
|
||||
if (!bio->bi_status &&
|
||||
test_bit(R1BIO_MadeGood, &r1_bio->state)) {
|
||||
test_bit(R1BIO_MadeGood, &r1_bio->state))
|
||||
rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
|
||||
}
|
||||
if (bio->bi_status &&
|
||||
test_bit(R1BIO_WriteError, &r1_bio->state)) {
|
||||
if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
|
||||
md_error(conf->mddev, rdev);
|
||||
}
|
||||
test_bit(R1BIO_WriteError, &r1_bio->state))
|
||||
rdev_set_badblocks(rdev, r1_bio->sector, s, 0);
|
||||
}
|
||||
put_buf(r1_bio);
|
||||
md_done_sync(conf->mddev, s);
|
||||
|
||||
@@ -2604,8 +2604,7 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
||||
&rdev->mddev->recovery);
|
||||
}
|
||||
/* need to record an error - either for the block or the device */
|
||||
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
|
||||
md_error(rdev->mddev, rdev);
|
||||
rdev_set_badblocks(rdev, sector, sectors, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2686,7 +2685,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
r10_bio->devs[slot].addr
|
||||
+ sect,
|
||||
s, 0)) {
|
||||
md_error(mddev, rdev);
|
||||
r10_bio->devs[slot].bio
|
||||
= IO_BLOCKED;
|
||||
}
|
||||
@@ -2825,7 +2823,6 @@ static void narrow_write_error(struct r10bio *r10_bio, int i)
|
||||
* Badblocks set failed, disk marked Faulty.
|
||||
* No further operations needed.
|
||||
*/
|
||||
md_error(mddev, rdev);
|
||||
bio_put(wbio);
|
||||
break;
|
||||
}
|
||||
@@ -2894,35 +2891,29 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
||||
if (r10_bio->devs[m].bio == NULL ||
|
||||
r10_bio->devs[m].bio->bi_end_io == NULL)
|
||||
continue;
|
||||
if (!r10_bio->devs[m].bio->bi_status) {
|
||||
if (!r10_bio->devs[m].bio->bi_status)
|
||||
rdev_clear_badblocks(
|
||||
rdev,
|
||||
r10_bio->devs[m].addr,
|
||||
r10_bio->sectors, 0);
|
||||
} else {
|
||||
if (!rdev_set_badblocks(
|
||||
rdev,
|
||||
r10_bio->devs[m].addr,
|
||||
r10_bio->sectors, 0))
|
||||
md_error(conf->mddev, rdev);
|
||||
}
|
||||
else
|
||||
rdev_set_badblocks(rdev,
|
||||
r10_bio->devs[m].addr,
|
||||
r10_bio->sectors, 0);
|
||||
rdev = conf->mirrors[dev].replacement;
|
||||
if (r10_bio->devs[m].repl_bio == NULL ||
|
||||
r10_bio->devs[m].repl_bio->bi_end_io == NULL)
|
||||
continue;
|
||||
|
||||
if (!r10_bio->devs[m].repl_bio->bi_status) {
|
||||
if (!r10_bio->devs[m].repl_bio->bi_status)
|
||||
rdev_clear_badblocks(
|
||||
rdev,
|
||||
r10_bio->devs[m].addr,
|
||||
r10_bio->sectors, 0);
|
||||
} else {
|
||||
if (!rdev_set_badblocks(
|
||||
rdev,
|
||||
r10_bio->devs[m].addr,
|
||||
r10_bio->sectors, 0))
|
||||
md_error(conf->mddev, rdev);
|
||||
}
|
||||
else
|
||||
rdev_set_badblocks(rdev,
|
||||
r10_bio->devs[m].addr,
|
||||
r10_bio->sectors, 0);
|
||||
}
|
||||
put_buf(r10_bio);
|
||||
} else {
|
||||
|
||||
@@ -2821,11 +2821,9 @@ static void raid5_end_read_request(struct bio * bi)
|
||||
else {
|
||||
clear_bit(R5_ReadError, &sh->dev[i].flags);
|
||||
clear_bit(R5_ReWrite, &sh->dev[i].flags);
|
||||
if (!(set_bad
|
||||
&& test_bit(In_sync, &rdev->flags)
|
||||
&& rdev_set_badblocks(
|
||||
rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0)))
|
||||
md_error(conf->mddev, rdev);
|
||||
if (!(set_bad && test_bit(In_sync, &rdev->flags)))
|
||||
rdev_set_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0);
|
||||
}
|
||||
}
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
@@ -3603,11 +3601,10 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
else
|
||||
rdev = NULL;
|
||||
if (rdev) {
|
||||
if (!rdev_set_badblocks(
|
||||
rdev,
|
||||
sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0))
|
||||
md_error(conf->mddev, rdev);
|
||||
rdev_set_badblocks(rdev,
|
||||
sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf),
|
||||
0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
}
|
||||
@@ -5259,9 +5256,8 @@ static void handle_stripe(struct stripe_head *sh)
|
||||
if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
|
||||
/* We own a safe reference to the rdev */
|
||||
rdev = conf->disks[i].rdev;
|
||||
if (!rdev_set_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0))
|
||||
md_error(conf->mddev, rdev);
|
||||
rdev_set_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
|
||||
|
||||
Reference in New Issue
Block a user