From af140f806ae2679f9dba48ea0f5811da83854eb6 Mon Sep 17 00:00:00 2001 From: Gou Hao Date: Thu, 14 Dec 2023 23:14:58 +0800 Subject: [PATCH 1/7] md/raid1: remove unnecessary null checking If %__GFP_DIRECT_RECLAIM is set then bio_alloc_bioset will always be able to allocate a bio. See comment of bio_alloc_bioset. Signed-off-by: Gou Hao Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231214151458.28970-1-gouhao@uniontech.com --- drivers/md/raid1.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9348f1709512..19c9bf0060ae 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1124,8 +1124,6 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio, behind_bio = bio_alloc_bioset(NULL, vcnt, 0, GFP_NOIO, &r1_bio->mddev->bio_set); - if (!behind_bio) - return; /* discard op, we don't support writezero/writesame yet */ if (!bio_has_data(bio)) { From dc1cc22ed58f11d58d8553c5ec5f11cbfc3e3039 Mon Sep 17 00:00:00 2001 From: Alex Lyakas Date: Wed, 13 Dec 2023 14:24:31 +0200 Subject: [PATCH 2/7] md: Whenassemble the array, consult the superblock of the freshest device Upon assembling the array, both kernel and mdadm allow the devices to have event counter difference of 1, and still consider them as up-to-date. However, a device whose event count is behind by 1, may in fact not be up-to-date, and array resync with such a device may cause data corruption. To avoid this, consult the superblock of the freshest device about the status of a device, whose event counter is behind by 1. Signed-off-by: Alex Lyakas Signed-off-by: Song Liu Link: https://lore.kernel.org/r/1702470271-16073-1-git-send-email-alex.lyakas@zadara.com --- drivers/md/md.c | 54 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 4e9fe5cbeedc..66b9e60b15c6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1206,6 +1206,7 @@ struct super_type { struct md_rdev *refdev, int minor_version); int (*validate_super)(struct mddev *mddev, + struct md_rdev *freshest, struct md_rdev *rdev); void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); @@ -1343,8 +1344,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor /* * validate_super for 0.90.0 + * note: we are not using "freshest" for 0.9 superblock */ -static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) +static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev) { mdp_disk_t *desc; mdp_super_t *sb = page_address(rdev->sb_page); @@ -1856,7 +1858,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ return ret; } -static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) +static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev) { struct mdp_superblock_1 *sb = page_address(rdev->sb_page); __u64 ev1 = le64_to_cpu(sb->events); @@ -1952,13 +1954,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling, except for - * spares (which don't need an event count) */ - ++ev1; + * spares (which don't need an event count). + * Similar to mdadm, we allow event counter difference of 1 + * from the freshest device. + */ if (rdev->desc_nr >= 0 && rdev->desc_nr < le32_to_cpu(sb->max_dev) && (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX || le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)) - if (ev1 < mddev->events) + if (ev1 + 1 < mddev->events) return -EINVAL; } else if (mddev->bitmap) { /* If adding to array with a bitmap, then we can accept an @@ -1979,8 +1983,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { role = MD_DISK_ROLE_SPARE; rdev->desc_nr = -1; - } else + } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) { + /* + * If we are assembling, and our event counter is smaller than the + * highest event counter, we cannot trust our superblock about the role. + * It could happen that our rdev was marked as Faulty, and all other + * superblocks were updated with +1 event counter. + * Then, before the next superblock update, which typically happens when + * remove_and_add_spares() removes the device from the array, there was + * a crash or reboot. + * If we allow current rdev without consulting the freshest superblock, + * we could cause data corruption. + * Note that in this case our event counter is smaller by 1 than the + * highest, otherwise, this rdev would not be allowed into array; + * both kernel and mdadm allow event counter difference of 1. + */ + struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page); + u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev); + + if (rdev->desc_nr >= freshest_max_dev) { + /* this is unexpected, better not proceed */ + pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n", + mdname(mddev), rdev->bdev, rdev->desc_nr, + freshest->bdev, freshest_max_dev); + return -EUCLEAN; + } + + role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]); + pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n", + mdname(mddev), rdev->bdev, role, role, freshest->bdev); + } else { role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + } switch(role) { case MD_DISK_ROLE_SPARE: /* spare */ break; @@ -2887,7 +2921,7 @@ static int add_bound_rdev(struct md_rdev *rdev) * and should be added immediately. */ super_types[mddev->major_version]. - validate_super(mddev, rdev); + validate_super(mddev, NULL/*freshest*/, rdev); err = mddev->pers->hot_add_disk(mddev, rdev); if (err) { md_kick_rdev_from_array(rdev); @@ -3824,7 +3858,7 @@ static int analyze_sbs(struct mddev *mddev) } super_types[mddev->major_version]. - validate_super(mddev, freshest); + validate_super(mddev, NULL/*freshest*/, freshest); i = 0; rdev_for_each_safe(rdev, tmp, mddev) { @@ -3839,7 +3873,7 @@ static int analyze_sbs(struct mddev *mddev) } if (rdev != freshest) { if (super_types[mddev->major_version]. - validate_super(mddev, rdev)) { + validate_super(mddev, freshest, rdev)) { pr_warn("md: kicking non-fresh %pg from array!\n", rdev->bdev); md_kick_rdev_from_array(rdev); @@ -6847,7 +6881,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) rdev->saved_raid_disk = rdev->raid_disk; } else super_types[mddev->major_version]. - validate_super(mddev, rdev); + validate_super(mddev, NULL/*freshest*/, rdev); if ((info->state & (1<raid_disk != info->raid_disk) { /* This was a hot-add request, but events doesn't From 1979dbbe328ca4e1d0f061c94381cfa03388088d Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 15 Dec 2023 10:38:51 +0800 Subject: [PATCH 3/7] md: factor out a helper exceed_read_errors() to check read_errors Move check_decay_read_errors() to raid1-10.c and factor out a helper exceed_read_errors() to check if read_errors exceeds the limit, so that raid1 can also use it. There are no functional changes. Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231215023852.3478228-2-linan666@huaweicloud.com --- drivers/md/raid1-10.c | 54 +++++++++++++++++++++++++++++++++++++++++++ drivers/md/raid1.c | 1 + drivers/md/raid10.c | 49 +++------------------------------------ 3 files changed, 58 insertions(+), 46 deletions(-) diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 3f22edec70e7..512746551f36 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -173,3 +173,57 @@ static inline void raid1_prepare_flush_writes(struct bitmap *bitmap) else md_bitmap_unplug(bitmap); } + +/* + * Used by fix_read_error() to decay the per rdev read_errors. + * We halve the read error count for every hour that has elapsed + * since the last recorded read error. + */ +static inline void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev) +{ + long cur_time_mon; + unsigned long hours_since_last; + unsigned int read_errors = atomic_read(&rdev->read_errors); + + cur_time_mon = ktime_get_seconds(); + + if (rdev->last_read_error == 0) { + /* first time we've seen a read error */ + rdev->last_read_error = cur_time_mon; + return; + } + + hours_since_last = (long)(cur_time_mon - + rdev->last_read_error) / 3600; + + rdev->last_read_error = cur_time_mon; + + /* + * if hours_since_last is > the number of bits in read_errors + * just set read errors to 0. We do this to avoid + * overflowing the shift of read_errors by hours_since_last. + */ + if (hours_since_last >= 8 * sizeof(read_errors)) + atomic_set(&rdev->read_errors, 0); + else + atomic_set(&rdev->read_errors, read_errors >> hours_since_last); +} + +static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev) +{ + int max_read_errors = atomic_read(&mddev->max_corr_read_errors); + int read_errors; + + check_decay_read_errors(mddev, rdev); + read_errors = atomic_inc_return(&rdev->read_errors); + if (read_errors > max_read_errors) { + pr_notice("md/"RAID_1_10_NAME":%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", + mdname(mddev), rdev->bdev, read_errors, max_read_errors); + pr_notice("md/"RAID_1_10_NAME":%s: %pg: Failing raid device\n", + mdname(mddev), rdev->bdev); + md_error(mddev, rdev); + return true; + } + + return false; +} diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 19c9bf0060ae..8c65ee0c4445 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); #define raid1_log(md, fmt, args...) \ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) +#define RAID_1_10_NAME "raid1" #include "raid1-10.c" #define START(node) ((node)->start) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 375c11d6159f..7412066ea22c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -19,6 +19,8 @@ #include #include #include "md.h" + +#define RAID_1_10_NAME "raid10" #include "raid10.h" #include "raid0.h" #include "md-bitmap.h" @@ -2592,42 +2594,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) } } -/* - * Used by fix_read_error() to decay the per rdev read_errors. - * We halve the read error count for every hour that has elapsed - * since the last recorded read error. - * - */ -static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev) -{ - long cur_time_mon; - unsigned long hours_since_last; - unsigned int read_errors = atomic_read(&rdev->read_errors); - - cur_time_mon = ktime_get_seconds(); - - if (rdev->last_read_error == 0) { - /* first time we've seen a read error */ - rdev->last_read_error = cur_time_mon; - return; - } - - hours_since_last = (long)(cur_time_mon - - rdev->last_read_error) / 3600; - - rdev->last_read_error = cur_time_mon; - - /* - * if hours_since_last is > the number of bits in read_errors - * just set read errors to 0. We do this to avoid - * overflowing the shift of read_errors by hours_since_last. - */ - if (hours_since_last >= 8 * sizeof(read_errors)) - atomic_set(&rdev->read_errors, 0); - else - atomic_set(&rdev->read_errors, read_errors >> hours_since_last); -} - static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, int sectors, struct page *page, enum req_op op) { @@ -2665,7 +2631,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 int sect = 0; /* Offset from r10_bio->sector */ int sectors = r10_bio->sectors, slot = r10_bio->read_slot; struct md_rdev *rdev; - int max_read_errors = atomic_read(&mddev->max_corr_read_errors); int d = r10_bio->devs[slot].devnum; /* still own a reference to this rdev, so it cannot @@ -2678,15 +2643,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 more fix_read_error() attempts */ return; - check_decay_read_errors(mddev, rdev); - atomic_inc(&rdev->read_errors); - if (atomic_read(&rdev->read_errors) > max_read_errors) { - pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", - mdname(mddev), rdev->bdev, - atomic_read(&rdev->read_errors), max_read_errors); - pr_notice("md/raid10:%s: %pg: Failing raid device\n", - mdname(mddev), rdev->bdev); - md_error(mddev, rdev); + if (exceed_read_errors(mddev, rdev)) { r10_bio->devs[slot].bio = IO_BLOCKED; return; } From ca294b34aaf3a417fe9069b174e52508ac918ec8 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 15 Dec 2023 10:38:52 +0800 Subject: [PATCH 4/7] md/raid1: support read error check After commit 1e50915fe0bb ("raid: improve MD/raid10 handling of correctable read errors."), rdev will be set to faulty if it reads data error to many times in raid10. Add this mechanism to raid1 now. Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231215023852.3478228-3-linan666@huaweicloud.com --- drivers/md/raid1.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8c65ee0c4445..aaa434f0c175 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2256,16 +2256,24 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) * 3. Performs writes following reads for array synchronising. */ -static void fix_read_error(struct r1conf *conf, int read_disk, - sector_t sect, int sectors) +static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio) { + sector_t sect = r1_bio->sector; + int sectors = r1_bio->sectors; + int read_disk = r1_bio->read_disk; struct mddev *mddev = conf->mddev; + struct md_rdev *rdev = rcu_dereference(conf->mirrors[read_disk].rdev); + + if (exceed_read_errors(mddev, rdev)) { + r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED; + return; + } + while(sectors) { int s = sectors; int d = read_disk; int success = 0; int start; - struct md_rdev *rdev; if (s > (PAGE_SIZE>>9)) s = PAGE_SIZE >> 9; @@ -2506,8 +2514,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) if (mddev->ro == 0 && !test_bit(FailFast, &rdev->flags)) { freeze_array(conf, 1); - fix_read_error(conf, r1_bio->read_disk, - r1_bio->sector, r1_bio->sectors); + fix_read_error(conf, r1_bio); unfreeze_array(conf); } else if (mddev->ro == 0 && test_bit(FailFast, &rdev->flags)) { md_error(mddev, rdev); From 849d18e27be9a1253f2318cb4549cc857219d991 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 14 Dec 2023 14:21:05 -0800 Subject: [PATCH 5/7] md: Remove deprecated CONFIG_MD_LINEAR md-linear has been marked as deprecated for 2.5 years. Remove it. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Neil Brown Cc: Guoqing Jiang Cc: Mateusz Grzonka Cc: Jes Sorensen Signed-off-by: Song Liu Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20231214222107.2016042-2-song@kernel.org --- drivers/md/Kconfig | 13 -- drivers/md/Makefile | 6 +- drivers/md/md-autodetect.c | 8 +- drivers/md/md-linear.c | 318 --------------------------------- drivers/md/md.c | 2 +- include/uapi/linux/raid/md_p.h | 8 +- include/uapi/linux/raid/md_u.h | 7 +- 7 files changed, 8 insertions(+), 354 deletions(-) delete mode 100644 drivers/md/md-linear.c diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 2a8b081bce7d..0c721e0e5921 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -61,19 +61,6 @@ config MD_BITMAP_FILE various kernel APIs and can only work with files on a file system not actually sitting on the MD device. -config MD_LINEAR - tristate "Linear (append) mode (deprecated)" - depends on BLK_DEV_MD - help - If you say Y here, then your multiple devices driver will be able to - use the so-called linear mode, i.e. it will combine the hard disk - partitions by simply appending one to the other. - - To compile this as a module, choose M here: the module - will be called linear. - - If unsure, say Y. - config MD_RAID0 tristate "RAID-0 (striping) mode" depends on BLK_DEV_MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 84291e38dca8..c72f76cf7b63 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -29,16 +29,14 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o md-mod-y += md.o md-bitmap.o raid456-y += raid5.o raid5-cache.o raid5-ppl.o -linear-y += md-linear.o multipath-y += md-multipath.o faulty-y += md-faulty.o # Note: link order is important. All raid personalities -# and must come before md.o, as they each initialise -# themselves, and md.o may use the personalities when it +# and must come before md.o, as they each initialise +# themselves, and md.o may use the personalities when it # auto-initialised. -obj-$(CONFIG_MD_LINEAR) += linear.o obj-$(CONFIG_MD_RAID0) += raid0.o obj-$(CONFIG_MD_RAID1) += raid1.o obj-$(CONFIG_MD_RAID10) += raid10.o diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c index 4b80165afd23..b2a00f213c2c 100644 --- a/drivers/md/md-autodetect.c +++ b/drivers/md/md-autodetect.c @@ -49,7 +49,6 @@ static int md_setup_ents __initdata; * instead of just one. -- KTK * 18May2000: Added support for persistent-superblock arrays: * md=n,0,factor,fault,device-list uses RAID0 for device n - * md=n,-1,factor,fault,device-list uses LINEAR for device n * md=n,device-list reads a RAID superblock from the devices * elements in device-list are read by name_to_kdev_t so can be * a hex number or something like /dev/hda1 /dev/sdb @@ -88,7 +87,7 @@ static int __init md_setup(char *str) md_setup_ents++; switch (get_option(&str, &level)) { /* RAID level */ case 2: /* could be 0 or -1.. */ - if (level == 0 || level == LEVEL_LINEAR) { + if (level == 0) { if (get_option(&str, &factor) != 2 || /* Chunk Size */ get_option(&str, &fault) != 2) { printk(KERN_WARNING "md: Too few arguments supplied to md=.\n"); @@ -96,10 +95,7 @@ static int __init md_setup(char *str) } md_setup_args[ent].level = level; md_setup_args[ent].chunk = 1 << (factor+12); - if (level == LEVEL_LINEAR) - pername = "linear"; - else - pername = "raid0"; + pername = "raid0"; break; } fallthrough; diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c deleted file mode 100644 index 8eca7693b793..000000000000 --- a/drivers/md/md-linear.c +++ /dev/null @@ -1,318 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - linear.c : Multiple Devices driver for Linux - Copyright (C) 1994-96 Marc ZYNGIER - or - - - Linear mode management functions. - -*/ - -#include -#include -#include -#include -#include -#include -#include "md.h" -#include "md-linear.h" - -/* - * find which device holds a particular offset - */ -static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) -{ - int lo, mid, hi; - struct linear_conf *conf; - - lo = 0; - hi = mddev->raid_disks - 1; - conf = mddev->private; - - /* - * Binary Search - */ - - while (hi > lo) { - - mid = (hi + lo) / 2; - if (sector < conf->disks[mid].end_sector) - hi = mid; - else - lo = mid + 1; - } - - return conf->disks + lo; -} - -static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks) -{ - struct linear_conf *conf; - sector_t array_sectors; - - conf = mddev->private; - WARN_ONCE(sectors || raid_disks, - "%s does not support generic reshape\n", __func__); - array_sectors = conf->array_sectors; - - return array_sectors; -} - -static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) -{ - struct linear_conf *conf; - struct md_rdev *rdev; - int i, cnt; - - conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL); - if (!conf) - return NULL; - - /* - * conf->raid_disks is copy of mddev->raid_disks. The reason to - * keep a copy of mddev->raid_disks in struct linear_conf is, - * mddev->raid_disks may not be consistent with pointers number of - * conf->disks[] when it is updated in linear_add() and used to - * iterate old conf->disks[] earray in linear_congested(). - * Here conf->raid_disks is always consitent with number of - * pointers in conf->disks[] array, and mddev->private is updated - * with rcu_assign_pointer() in linear_addr(), such race can be - * avoided. - */ - conf->raid_disks = raid_disks; - - cnt = 0; - conf->array_sectors = 0; - - rdev_for_each(rdev, mddev) { - int j = rdev->raid_disk; - struct dev_info *disk = conf->disks + j; - sector_t sectors; - - if (j < 0 || j >= raid_disks || disk->rdev) { - pr_warn("md/linear:%s: disk numbering problem. Aborting!\n", - mdname(mddev)); - goto out; - } - - disk->rdev = rdev; - if (mddev->chunk_sectors) { - sectors = rdev->sectors; - sector_div(sectors, mddev->chunk_sectors); - rdev->sectors = sectors * mddev->chunk_sectors; - } - - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - - conf->array_sectors += rdev->sectors; - cnt++; - } - if (cnt != raid_disks) { - pr_warn("md/linear:%s: not enough drives present. Aborting!\n", - mdname(mddev)); - goto out; - } - - /* - * Here we calculate the device offsets. - */ - conf->disks[0].end_sector = conf->disks[0].rdev->sectors; - - for (i = 1; i < raid_disks; i++) - conf->disks[i].end_sector = - conf->disks[i-1].end_sector + - conf->disks[i].rdev->sectors; - - return conf; - -out: - kfree(conf); - return NULL; -} - -static int linear_run (struct mddev *mddev) -{ - struct linear_conf *conf; - int ret; - - if (md_check_no_bitmap(mddev)) - return -EINVAL; - conf = linear_conf(mddev, mddev->raid_disks); - - if (!conf) - return 1; - mddev->private = conf; - md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); - - ret = md_integrity_register(mddev); - if (ret) { - kfree(conf); - mddev->private = NULL; - } - return ret; -} - -static int linear_add(struct mddev *mddev, struct md_rdev *rdev) -{ - /* Adding a drive to a linear array allows the array to grow. - * It is permitted if the new drive has a matching superblock - * already on it, with raid_disk equal to raid_disks. - * It is achieved by creating a new linear_private_data structure - * and swapping it in in-place of the current one. - * The current one is never freed until the array is stopped. - * This avoids races. - */ - struct linear_conf *newconf, *oldconf; - - if (rdev->saved_raid_disk != mddev->raid_disks) - return -EINVAL; - - rdev->raid_disk = rdev->saved_raid_disk; - rdev->saved_raid_disk = -1; - - newconf = linear_conf(mddev,mddev->raid_disks+1); - - if (!newconf) - return -ENOMEM; - - /* newconf->raid_disks already keeps a copy of * the increased - * value of mddev->raid_disks, WARN_ONCE() is just used to make - * sure of this. It is possible that oldconf is still referenced - * in linear_congested(), therefore kfree_rcu() is used to free - * oldconf until no one uses it anymore. - */ - oldconf = rcu_dereference_protected(mddev->private, - lockdep_is_held(&mddev->reconfig_mutex)); - mddev->raid_disks++; - WARN_ONCE(mddev->raid_disks != newconf->raid_disks, - "copied raid_disks doesn't match mddev->raid_disks"); - rcu_assign_pointer(mddev->private, newconf); - md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); - set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); - kfree_rcu(oldconf, rcu); - return 0; -} - -static void linear_free(struct mddev *mddev, void *priv) -{ - struct linear_conf *conf = priv; - - kfree(conf); -} - -static bool linear_make_request(struct mddev *mddev, struct bio *bio) -{ - struct dev_info *tmp_dev; - sector_t start_sector, end_sector, data_offset; - sector_t bio_sector = bio->bi_iter.bi_sector; - - if (unlikely(bio->bi_opf & REQ_PREFLUSH) - && md_flush_request(mddev, bio)) - return true; - - tmp_dev = which_dev(mddev, bio_sector); - start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; - end_sector = tmp_dev->end_sector; - data_offset = tmp_dev->rdev->data_offset; - - if (unlikely(bio_sector >= end_sector || - bio_sector < start_sector)) - goto out_of_bounds; - - if (unlikely(is_rdev_broken(tmp_dev->rdev))) { - md_error(mddev, tmp_dev->rdev); - bio_io_error(bio); - return true; - } - - if (unlikely(bio_end_sector(bio) > end_sector)) { - /* This bio crosses a device boundary, so we have to split it */ - struct bio *split = bio_split(bio, end_sector - bio_sector, - GFP_NOIO, &mddev->bio_set); - bio_chain(split, bio); - submit_bio_noacct(bio); - bio = split; - } - - md_account_bio(mddev, &bio); - bio_set_dev(bio, tmp_dev->rdev->bdev); - bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - - start_sector + data_offset; - - if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !bdev_max_discard_sectors(bio->bi_bdev))) { - /* Just ignore it */ - bio_endio(bio); - } else { - if (mddev->gendisk) - trace_block_bio_remap(bio, disk_devt(mddev->gendisk), - bio_sector); - mddev_check_write_zeroes(mddev, bio); - submit_bio_noacct(bio); - } - return true; - -out_of_bounds: - pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n", - mdname(mddev), - (unsigned long long)bio->bi_iter.bi_sector, - tmp_dev->rdev->bdev, - (unsigned long long)tmp_dev->rdev->sectors, - (unsigned long long)start_sector); - bio_io_error(bio); - return true; -} - -static void linear_status (struct seq_file *seq, struct mddev *mddev) -{ - seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); -} - -static void linear_error(struct mddev *mddev, struct md_rdev *rdev) -{ - if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) { - char *md_name = mdname(mddev); - - pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n", - md_name, rdev->bdev); - } -} - -static void linear_quiesce(struct mddev *mddev, int state) -{ -} - -static struct md_personality linear_personality = -{ - .name = "linear", - .level = LEVEL_LINEAR, - .owner = THIS_MODULE, - .make_request = linear_make_request, - .run = linear_run, - .free = linear_free, - .status = linear_status, - .hot_add_disk = linear_add, - .size = linear_size, - .quiesce = linear_quiesce, - .error_handler = linear_error, -}; - -static int __init linear_init (void) -{ - return register_md_personality (&linear_personality); -} - -static void linear_exit (void) -{ - unregister_md_personality (&linear_personality); -} - -module_init(linear_init); -module_exit(linear_exit); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)"); -MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ -MODULE_ALIAS("md-linear"); -MODULE_ALIAS("md-level--1"); diff --git a/drivers/md/md.c b/drivers/md/md.c index 66b9e60b15c6..83f5a785c782 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8124,7 +8124,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) return; mddev->pers->error_handler(mddev, rdev); - if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR) + if (mddev->pers->level == 0) return; if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags)) diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 6c0aa577730f..b36e282a413d 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -2,15 +2,11 @@ /* md_p.h : physical layout of Linux RAID devices Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _MD_P_H @@ -237,7 +233,7 @@ struct mdp_superblock_1 { char set_name[32]; /* set and interpreted by user-space */ __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ + __le32 level; /* -4 (multipath), 0,1,4,5 */ __le32 layout; /* only for raid5 and raid10 currently */ __le64 size; /* used size of component devices, in 512byte sectors */ diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index 105307244961..c285f76e5d8d 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -2,15 +2,11 @@ /* md_u.h : user <=> kernel API between Linux raidtools and RAID drivers Copyright (C) 1998 Ingo Molnar - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _UAPI_MD_U_H @@ -109,7 +105,6 @@ typedef struct mdu_array_info_s { /* non-obvious values for 'level' */ #define LEVEL_MULTIPATH (-4) -#define LEVEL_LINEAR (-1) #define LEVEL_FAULTY (-5) /* we need a value for 'no level specified' and 0 From d8730f0cf4effa015bc5e8840d8f8fb3cdb01aab Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 14 Dec 2023 14:21:06 -0800 Subject: [PATCH 6/7] md: Remove deprecated CONFIG_MD_MULTIPATH md-multipath has been marked as deprecated for 2.5 years. Remove it. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Neil Brown Cc: Guoqing Jiang Cc: Mateusz Grzonka Cc: Jes Sorensen Signed-off-by: Song Liu Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20231214222107.2016042-3-song@kernel.org --- drivers/md/Kconfig | 11 - drivers/md/Makefile | 2 - drivers/md/md-multipath.c | 463 --------------------------------- drivers/md/md.c | 239 ++++++++--------- include/uapi/linux/raid/md_p.h | 2 +- include/uapi/linux/raid/md_u.h | 1 - 6 files changed, 108 insertions(+), 610 deletions(-) delete mode 100644 drivers/md/md-multipath.c diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 0c721e0e5921..de4f47fe5a03 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -159,17 +159,6 @@ config MD_RAID456 If unsure, say Y. -config MD_MULTIPATH - tristate "Multipath I/O support (deprecated)" - depends on BLK_DEV_MD - help - MD_MULTIPATH provides a simple multi-path personality for use - the MD framework. It is not under active development. New - projects should consider using DM_MULTIPATH which has more - features and more testing. - - If unsure, say N. - config MD_FAULTY tristate "Faulty test module for MD (deprecated)" depends on BLK_DEV_MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index c72f76cf7b63..6287c73399e7 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -29,7 +29,6 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o md-mod-y += md.o md-bitmap.o raid456-y += raid5.o raid5-cache.o raid5-ppl.o -multipath-y += md-multipath.o faulty-y += md-faulty.o # Note: link order is important. All raid personalities @@ -41,7 +40,6 @@ obj-$(CONFIG_MD_RAID0) += raid0.o obj-$(CONFIG_MD_RAID1) += raid1.o obj-$(CONFIG_MD_RAID10) += raid10.o obj-$(CONFIG_MD_RAID456) += raid456.o -obj-$(CONFIG_MD_MULTIPATH) += multipath.o obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_MD_CLUSTER) += md-cluster.o obj-$(CONFIG_BCACHE) += bcache/ diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c deleted file mode 100644 index 19c8625ea642..000000000000 --- a/drivers/md/md-multipath.c +++ /dev/null @@ -1,463 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * multipath.c : Multiple Devices driver for Linux - * - * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat - * - * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman - * - * MULTIPATH management functions. - * - * derived from raid1.c. - */ - -#include -#include -#include -#include -#include -#include "md.h" -#include "md-multipath.h" - -#define MAX_WORK_PER_DISK 128 - -#define NR_RESERVED_BUFS 32 - -static int multipath_map (struct mpconf *conf) -{ - int i, disks = conf->raid_disks; - - /* - * Later we do read balancing on the read side - * now we use the first available disk. - */ - - for (i = 0; i < disks; i++) { - struct md_rdev *rdev = conf->multipaths[i].rdev; - - if (rdev && test_bit(In_sync, &rdev->flags) && - !test_bit(Faulty, &rdev->flags)) { - atomic_inc(&rdev->nr_pending); - return i; - } - } - - pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n"); - return (-1); -} - -static void multipath_reschedule_retry (struct multipath_bh *mp_bh) -{ - unsigned long flags; - struct mddev *mddev = mp_bh->mddev; - struct mpconf *conf = mddev->private; - - spin_lock_irqsave(&conf->device_lock, flags); - list_add(&mp_bh->retry_list, &conf->retry_list); - spin_unlock_irqrestore(&conf->device_lock, flags); - md_wakeup_thread(mddev->thread); -} - -/* - * multipath_end_bh_io() is called when we have finished servicing a multipathed - * operation and are ready to return a success/failure code to the buffer - * cache layer. - */ -static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status) -{ - struct bio *bio = mp_bh->master_bio; - struct mpconf *conf = mp_bh->mddev->private; - - bio->bi_status = status; - bio_endio(bio); - mempool_free(mp_bh, &conf->pool); -} - -static void multipath_end_request(struct bio *bio) -{ - struct multipath_bh *mp_bh = bio->bi_private; - struct mpconf *conf = mp_bh->mddev->private; - struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev; - - if (!bio->bi_status) - multipath_end_bh_io(mp_bh, 0); - else if (!(bio->bi_opf & REQ_RAHEAD)) { - /* - * oops, IO error: - */ - md_error (mp_bh->mddev, rdev); - pr_info("multipath: %pg: rescheduling sector %llu\n", - rdev->bdev, - (unsigned long long)bio->bi_iter.bi_sector); - multipath_reschedule_retry(mp_bh); - } else - multipath_end_bh_io(mp_bh, bio->bi_status); - rdev_dec_pending(rdev, conf->mddev); -} - -static bool multipath_make_request(struct mddev *mddev, struct bio * bio) -{ - struct mpconf *conf = mddev->private; - struct multipath_bh * mp_bh; - struct multipath_info *multipath; - - if (unlikely(bio->bi_opf & REQ_PREFLUSH) - && md_flush_request(mddev, bio)) - return true; - - md_account_bio(mddev, &bio); - mp_bh = mempool_alloc(&conf->pool, GFP_NOIO); - - mp_bh->master_bio = bio; - mp_bh->mddev = mddev; - - mp_bh->path = multipath_map(conf); - if (mp_bh->path < 0) { - bio_io_error(bio); - mempool_free(mp_bh, &conf->pool); - return true; - } - multipath = conf->multipaths + mp_bh->path; - - bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO); - - mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; - mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT; - mp_bh->bio.bi_end_io = multipath_end_request; - mp_bh->bio.bi_private = mp_bh; - mddev_check_write_zeroes(mddev, &mp_bh->bio); - submit_bio_noacct(&mp_bh->bio); - return true; -} - -static void multipath_status(struct seq_file *seq, struct mddev *mddev) -{ - struct mpconf *conf = mddev->private; - int i; - - lockdep_assert_held(&mddev->lock); - - seq_printf (seq, " [%d/%d] [", conf->raid_disks, - conf->raid_disks - mddev->degraded); - for (i = 0; i < conf->raid_disks; i++) { - struct md_rdev *rdev = READ_ONCE(conf->multipaths[i].rdev); - - seq_printf(seq, "%s", - rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); - } - seq_putc(seq, ']'); -} - -/* - * Careful, this can execute in IRQ contexts as well! - */ -static void multipath_error (struct mddev *mddev, struct md_rdev *rdev) -{ - struct mpconf *conf = mddev->private; - - if (conf->raid_disks - mddev->degraded <= 1) { - /* - * Uh oh, we can do nothing if this is our last path, but - * first check if this is a queued request for a device - * which has just failed. - */ - pr_warn("multipath: only one IO path left and IO error.\n"); - /* leave it active... it's all we have */ - return; - } - /* - * Mark disk as unusable - */ - if (test_and_clear_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded++; - spin_unlock_irqrestore(&conf->device_lock, flags); - } - set_bit(Faulty, &rdev->flags); - set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); - pr_err("multipath: IO failure on %pg, disabling IO path.\n" - "multipath: Operation continuing on %d IO paths.\n", - rdev->bdev, - conf->raid_disks - mddev->degraded); -} - -static void print_multipath_conf(struct mpconf *conf) -{ - int i; - struct multipath_info *tmp; - - pr_debug("MULTIPATH conf printout:\n"); - if (!conf) { - pr_debug("(conf==NULL)\n"); - return; - } - pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, - conf->raid_disks); - - lockdep_assert_held(&conf->mddev->reconfig_mutex); - for (i = 0; i < conf->raid_disks; i++) { - tmp = conf->multipaths + i; - if (tmp->rdev) - pr_debug(" disk%d, o:%d, dev:%pg\n", - i,!test_bit(Faulty, &tmp->rdev->flags), - tmp->rdev->bdev); - } -} - -static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) -{ - struct mpconf *conf = mddev->private; - int err = -EEXIST; - int path; - struct multipath_info *p; - int first = 0; - int last = mddev->raid_disks - 1; - - if (rdev->raid_disk >= 0) - first = last = rdev->raid_disk; - - print_multipath_conf(conf); - - for (path = first; path <= last; path++) - if ((p=conf->multipaths+path)->rdev == NULL) { - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - - err = md_integrity_add_rdev(rdev, mddev); - if (err) - break; - spin_lock_irq(&conf->device_lock); - mddev->degraded--; - rdev->raid_disk = path; - set_bit(In_sync, &rdev->flags); - spin_unlock_irq(&conf->device_lock); - WRITE_ONCE(p->rdev, rdev); - err = 0; - break; - } - - print_multipath_conf(conf); - - return err; -} - -static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) -{ - struct mpconf *conf = mddev->private; - int err = 0; - int number = rdev->raid_disk; - struct multipath_info *p = conf->multipaths + number; - - print_multipath_conf(conf); - - if (rdev == p->rdev) { - if (test_bit(In_sync, &rdev->flags) || - atomic_read(&rdev->nr_pending)) { - pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number); - err = -EBUSY; - goto abort; - } - WRITE_ONCE(p->rdev, NULL); - err = md_integrity_register(mddev); - } -abort: - - print_multipath_conf(conf); - return err; -} - -/* - * This is a kernel thread which: - * - * 1. Retries failed read operations on working multipaths. - * 2. Updates the raid superblock when problems encounter. - * 3. Performs writes following reads for array syncronising. - */ - -static void multipathd(struct md_thread *thread) -{ - struct mddev *mddev = thread->mddev; - struct multipath_bh *mp_bh; - struct bio *bio; - unsigned long flags; - struct mpconf *conf = mddev->private; - struct list_head *head = &conf->retry_list; - - md_check_recovery(mddev); - for (;;) { - spin_lock_irqsave(&conf->device_lock, flags); - if (list_empty(head)) - break; - mp_bh = list_entry(head->prev, struct multipath_bh, retry_list); - list_del(head->prev); - spin_unlock_irqrestore(&conf->device_lock, flags); - - bio = &mp_bh->bio; - bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector; - - if ((mp_bh->path = multipath_map (conf))<0) { - pr_err("multipath: %pg: unrecoverable IO read error for block %llu\n", - bio->bi_bdev, - (unsigned long long)bio->bi_iter.bi_sector); - multipath_end_bh_io(mp_bh, BLK_STS_IOERR); - } else { - pr_err("multipath: %pg: redirecting sector %llu to another IO path\n", - bio->bi_bdev, - (unsigned long long)bio->bi_iter.bi_sector); - *bio = *(mp_bh->master_bio); - bio->bi_iter.bi_sector += - conf->multipaths[mp_bh->path].rdev->data_offset; - bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev); - bio->bi_opf |= REQ_FAILFAST_TRANSPORT; - bio->bi_end_io = multipath_end_request; - bio->bi_private = mp_bh; - submit_bio_noacct(bio); - } - } - spin_unlock_irqrestore(&conf->device_lock, flags); -} - -static sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks) -{ - WARN_ONCE(sectors || raid_disks, - "%s does not support generic reshape\n", __func__); - - return mddev->dev_sectors; -} - -static int multipath_run (struct mddev *mddev) -{ - struct mpconf *conf; - int disk_idx; - struct multipath_info *disk; - struct md_rdev *rdev; - int working_disks; - int ret; - - if (md_check_no_bitmap(mddev)) - return -EINVAL; - - if (mddev->level != LEVEL_MULTIPATH) { - pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n", - mdname(mddev), mddev->level); - goto out; - } - /* - * copy the already verified devices into our private MULTIPATH - * bookkeeping area. [whatever we allocate in multipath_run(), - * should be freed in multipath_free()] - */ - - conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL); - mddev->private = conf; - if (!conf) - goto out; - - conf->multipaths = kcalloc(mddev->raid_disks, - sizeof(struct multipath_info), - GFP_KERNEL); - if (!conf->multipaths) - goto out_free_conf; - - working_disks = 0; - rdev_for_each(rdev, mddev) { - disk_idx = rdev->raid_disk; - if (disk_idx < 0 || - disk_idx >= mddev->raid_disks) - continue; - - disk = conf->multipaths + disk_idx; - disk->rdev = rdev; - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - - if (!test_bit(Faulty, &rdev->flags)) - working_disks++; - } - - conf->raid_disks = mddev->raid_disks; - conf->mddev = mddev; - spin_lock_init(&conf->device_lock); - INIT_LIST_HEAD(&conf->retry_list); - - if (!working_disks) { - pr_warn("multipath: no operational IO paths for %s\n", - mdname(mddev)); - goto out_free_conf; - } - mddev->degraded = conf->raid_disks - working_disks; - - ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS, - sizeof(struct multipath_bh)); - if (ret) - goto out_free_conf; - - rcu_assign_pointer(mddev->thread, - md_register_thread(multipathd, mddev, "multipath")); - if (!mddev->thread) - goto out_free_conf; - - pr_info("multipath: array %s active with %d out of %d IO paths\n", - mdname(mddev), conf->raid_disks - mddev->degraded, - mddev->raid_disks); - /* - * Ok, everything is just fine now - */ - md_set_array_sectors(mddev, multipath_size(mddev, 0, 0)); - - if (md_integrity_register(mddev)) - goto out_free_conf; - - return 0; - -out_free_conf: - mempool_exit(&conf->pool); - kfree(conf->multipaths); - kfree(conf); - mddev->private = NULL; -out: - return -EIO; -} - -static void multipath_free(struct mddev *mddev, void *priv) -{ - struct mpconf *conf = priv; - - mempool_exit(&conf->pool); - kfree(conf->multipaths); - kfree(conf); -} - -static struct md_personality multipath_personality = -{ - .name = "multipath", - .level = LEVEL_MULTIPATH, - .owner = THIS_MODULE, - .make_request = multipath_make_request, - .run = multipath_run, - .free = multipath_free, - .status = multipath_status, - .error_handler = multipath_error, - .hot_add_disk = multipath_add_disk, - .hot_remove_disk= multipath_remove_disk, - .size = multipath_size, -}; - -static int __init multipath_init (void) -{ - return register_md_personality (&multipath_personality); -} - -static void __exit multipath_exit (void) -{ - unregister_md_personality (&multipath_personality); -} - -module_init(multipath_init); -module_exit(multipath_exit); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)"); -MODULE_ALIAS("md-personality-7"); /* MULTIPATH */ -MODULE_ALIAS("md-multipath"); -MODULE_ALIAS("md-level--4"); diff --git a/drivers/md/md.c b/drivers/md/md.c index 83f5a785c782..e351e6c51cc7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1287,17 +1287,11 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor rdev->sb_size = MD_SB_BYTES; rdev->badblocks.shift = -1; - if (sb->level == LEVEL_MULTIPATH) - rdev->desc_nr = -1; - else - rdev->desc_nr = sb->this_disk.number; + rdev->desc_nr = sb->this_disk.number; - /* not spare disk, or LEVEL_MULTIPATH */ - if (sb->level == LEVEL_MULTIPATH || - (rdev->desc_nr >= 0 && - rdev->desc_nr < MD_SB_DISKS && - sb->disks[rdev->desc_nr].state & - ((1<desc_nr >= 0 && rdev->desc_nr < MD_SB_DISKS && + sb->disks[rdev->desc_nr].state & ((1<level != LEVEL_MULTIPATH) { - desc = sb->disks + rdev->desc_nr; + desc = sb->disks + rdev->desc_nr; - if (desc->state & (1<flags); - else if (desc->state & (1<raid_disk < mddev->raid_disks */) { - set_bit(In_sync, &rdev->flags); - rdev->raid_disk = desc->raid_disk; - rdev->saved_raid_disk = desc->raid_disk; - } else if (desc->state & (1<minor_version >= 91) { - rdev->recovery_offset = 0; - rdev->raid_disk = desc->raid_disk; - } - } - if (desc->state & (1<flags); - if (desc->state & (1<flags); - } else /* MULTIPATH are always insync */ + if (desc->state & (1<flags); + else if (desc->state & (1<flags); + rdev->raid_disk = desc->raid_disk; + rdev->saved_raid_disk = desc->raid_disk; + } else if (desc->state & (1<minor_version >= 91) { + rdev->recovery_offset = 0; + rdev->raid_disk = desc->raid_disk; + } + } + if (desc->state & (1<flags); + if (desc->state & (1<flags); return 0; } @@ -1758,10 +1749,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ && rdev->new_data_offset < sb_start + (rdev->sb_size/512)) return -EINVAL; - if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) - rdev->desc_nr = -1; - else - rdev->desc_nr = le32_to_cpu(sb->dev_number); + rdev->desc_nr = le32_to_cpu(sb->dev_number); if (!rdev->bb_page) { rdev->bb_page = alloc_page(GFP_KERNEL); @@ -1814,12 +1802,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ sb->level != 0) return -EINVAL; - /* not spare disk, or LEVEL_MULTIPATH */ - if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) || - (rdev->desc_nr >= 0 && - rdev->desc_nr < le32_to_cpu(sb->max_dev) && - (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX || - le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))) + /* not spare disk */ + if (rdev->desc_nr >= 0 && rdev->desc_nr < le32_to_cpu(sb->max_dev) && + (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX || + le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)) spare_disk = false; if (!refdev) { @@ -1862,6 +1848,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc { struct mdp_superblock_1 *sb = page_address(rdev->sb_page); __u64 ev1 = le64_to_cpu(sb->events); + int role; rdev->raid_disk = -1; clear_bit(Faulty, &rdev->flags); @@ -1977,88 +1964,85 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc /* just a hot-add of a new device, leave raid_disk at -1 */ return 0; } - if (mddev->level != LEVEL_MULTIPATH) { - int role; - if (rdev->desc_nr < 0 || - rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { - role = MD_DISK_ROLE_SPARE; - rdev->desc_nr = -1; - } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) { - /* - * If we are assembling, and our event counter is smaller than the - * highest event counter, we cannot trust our superblock about the role. - * It could happen that our rdev was marked as Faulty, and all other - * superblocks were updated with +1 event counter. - * Then, before the next superblock update, which typically happens when - * remove_and_add_spares() removes the device from the array, there was - * a crash or reboot. - * If we allow current rdev without consulting the freshest superblock, - * we could cause data corruption. - * Note that in this case our event counter is smaller by 1 than the - * highest, otherwise, this rdev would not be allowed into array; - * both kernel and mdadm allow event counter difference of 1. - */ - struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page); - u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev); - if (rdev->desc_nr >= freshest_max_dev) { - /* this is unexpected, better not proceed */ - pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n", - mdname(mddev), rdev->bdev, rdev->desc_nr, - freshest->bdev, freshest_max_dev); - return -EUCLEAN; - } + if (rdev->desc_nr < 0 || + rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { + role = MD_DISK_ROLE_SPARE; + rdev->desc_nr = -1; + } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) { + /* + * If we are assembling, and our event counter is smaller than the + * highest event counter, we cannot trust our superblock about the role. + * It could happen that our rdev was marked as Faulty, and all other + * superblocks were updated with +1 event counter. + * Then, before the next superblock update, which typically happens when + * remove_and_add_spares() removes the device from the array, there was + * a crash or reboot. + * If we allow current rdev without consulting the freshest superblock, + * we could cause data corruption. + * Note that in this case our event counter is smaller by 1 than the + * highest, otherwise, this rdev would not be allowed into array; + * both kernel and mdadm allow event counter difference of 1. + */ + struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page); + u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev); - role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]); - pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n", - mdname(mddev), rdev->bdev, role, role, freshest->bdev); + if (rdev->desc_nr >= freshest_max_dev) { + /* this is unexpected, better not proceed */ + pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n", + mdname(mddev), rdev->bdev, rdev->desc_nr, + freshest->bdev, freshest_max_dev); + return -EUCLEAN; + } + + role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]); + pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n", + mdname(mddev), rdev->bdev, role, role, freshest->bdev); + } else { + role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + } + switch (role) { + case MD_DISK_ROLE_SPARE: /* spare */ + break; + case MD_DISK_ROLE_FAULTY: /* faulty */ + set_bit(Faulty, &rdev->flags); + break; + case MD_DISK_ROLE_JOURNAL: /* journal device */ + if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) { + /* journal device without journal feature */ + pr_warn("md: journal device provided without journal feature, ignoring the device\n"); + return -EINVAL; + } + set_bit(Journal, &rdev->flags); + rdev->journal_tail = le64_to_cpu(sb->journal_tail); + rdev->raid_disk = 0; + break; + default: + rdev->saved_raid_disk = role; + if ((le32_to_cpu(sb->feature_map) & + MD_FEATURE_RECOVERY_OFFSET)) { + rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); + if (!(le32_to_cpu(sb->feature_map) & + MD_FEATURE_RECOVERY_BITMAP)) + rdev->saved_raid_disk = -1; } else { - role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + /* + * If the array is FROZEN, then the device can't + * be in_sync with rest of array. + */ + if (!test_bit(MD_RECOVERY_FROZEN, + &mddev->recovery)) + set_bit(In_sync, &rdev->flags); } - switch(role) { - case MD_DISK_ROLE_SPARE: /* spare */ - break; - case MD_DISK_ROLE_FAULTY: /* faulty */ - set_bit(Faulty, &rdev->flags); - break; - case MD_DISK_ROLE_JOURNAL: /* journal device */ - if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) { - /* journal device without journal feature */ - pr_warn("md: journal device provided without journal feature, ignoring the device\n"); - return -EINVAL; - } - set_bit(Journal, &rdev->flags); - rdev->journal_tail = le64_to_cpu(sb->journal_tail); - rdev->raid_disk = 0; - break; - default: - rdev->saved_raid_disk = role; - if ((le32_to_cpu(sb->feature_map) & - MD_FEATURE_RECOVERY_OFFSET)) { - rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); - if (!(le32_to_cpu(sb->feature_map) & - MD_FEATURE_RECOVERY_BITMAP)) - rdev->saved_raid_disk = -1; - } else { - /* - * If the array is FROZEN, then the device can't - * be in_sync with rest of array. - */ - if (!test_bit(MD_RECOVERY_FROZEN, - &mddev->recovery)) - set_bit(In_sync, &rdev->flags); - } - rdev->raid_disk = role; - break; - } - if (sb->devflags & WriteMostly1) - set_bit(WriteMostly, &rdev->flags); - if (sb->devflags & FailFast1) - set_bit(FailFast, &rdev->flags); - if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT) - set_bit(Replacement, &rdev->flags); - } else /* MULTIPATH are always insync */ - set_bit(In_sync, &rdev->flags); + rdev->raid_disk = role; + break; + } + if (sb->devflags & WriteMostly1) + set_bit(WriteMostly, &rdev->flags); + if (sb->devflags & FailFast1) + set_bit(FailFast, &rdev->flags); + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT) + set_bit(Replacement, &rdev->flags); return 0; } @@ -2876,10 +2860,6 @@ void md_update_sb(struct mddev *mddev, int force_change) } else pr_debug("md: %pg (skipping faulty)\n", rdev->bdev); - - if (mddev->level == LEVEL_MULTIPATH) - /* only need to write one superblock... */ - break; } if (md_super_wait(mddev) < 0) goto rewrite; @@ -3880,13 +3860,8 @@ static int analyze_sbs(struct mddev *mddev) continue; } } - if (mddev->level == LEVEL_MULTIPATH) { - rdev->desc_nr = i++; - rdev->raid_disk = rdev->desc_nr; - set_bit(In_sync, &rdev->flags); - } else if (rdev->raid_disk >= - (mddev->raid_disks - min(0, mddev->delta_disks)) && - !test_bit(Journal, &rdev->flags)) { + if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks)) && + !test_bit(Journal, &rdev->flags)) { rdev->raid_disk = -1; clear_bit(In_sync, &rdev->flags); } diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index b36e282a413d..5a43c23f53bf 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -233,7 +233,7 @@ struct mdp_superblock_1 { char set_name[32]; /* set and interpreted by user-space */ __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __le32 level; /* -4 (multipath), 0,1,4,5 */ + __le32 level; /* 0,1,4,5 */ __le32 layout; /* only for raid5 and raid10 currently */ __le64 size; /* used size of component devices, in 512byte sectors */ diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index c285f76e5d8d..b44bbc356643 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -104,7 +104,6 @@ typedef struct mdu_array_info_s { } mdu_array_info_t; /* non-obvious values for 'level' */ -#define LEVEL_MULTIPATH (-4) #define LEVEL_FAULTY (-5) /* we need a value for 'no level specified' and 0 From 415c7451872b0d037760795edd3961eaa63276ea Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 14 Dec 2023 14:21:07 -0800 Subject: [PATCH 7/7] md: Remove deprecated CONFIG_MD_FAULTY md-faulty has been marked as deprecated for 2.5 years. Remove it. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Neil Brown Cc: Guoqing Jiang Cc: Mateusz Grzonka Cc: Jes Sorensen Signed-off-by: Song Liu Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20231214222107.2016042-4-song@kernel.org --- drivers/md/Kconfig | 10 - drivers/md/Makefile | 2 - drivers/md/md-faulty.c | 365 --------------------------------- include/uapi/linux/raid/md_u.h | 3 - 4 files changed, 380 deletions(-) delete mode 100644 drivers/md/md-faulty.c diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index de4f47fe5a03..f6dc2acdf1ed 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -159,16 +159,6 @@ config MD_RAID456 If unsure, say Y. -config MD_FAULTY - tristate "Faulty test module for MD (deprecated)" - depends on BLK_DEV_MD - help - The "faulty" module allows for a block device that occasionally returns - read or write errors. It is useful for testing. - - In unsure, say N. - - config MD_CLUSTER tristate "Cluster Support for MD" depends on BLK_DEV_MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 6287c73399e7..027d7cfeca3f 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -29,7 +29,6 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o md-mod-y += md.o md-bitmap.o raid456-y += raid5.o raid5-cache.o raid5-ppl.o -faulty-y += md-faulty.o # Note: link order is important. All raid personalities # and must come before md.o, as they each initialise @@ -40,7 +39,6 @@ obj-$(CONFIG_MD_RAID0) += raid0.o obj-$(CONFIG_MD_RAID1) += raid1.o obj-$(CONFIG_MD_RAID10) += raid10.o obj-$(CONFIG_MD_RAID456) += raid456.o -obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_MD_CLUSTER) += md-cluster.o obj-$(CONFIG_BCACHE) += bcache/ obj-$(CONFIG_BLK_DEV_MD) += md-mod.o diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c deleted file mode 100644 index a039e8e20f55..000000000000 --- a/drivers/md/md-faulty.c +++ /dev/null @@ -1,365 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * faulty.c : Multiple Devices driver for Linux - * - * Copyright (C) 2004 Neil Brown - * - * fautly-device-simulator personality for md - */ - - -/* - * The "faulty" personality causes some requests to fail. - * - * Possible failure modes are: - * reads fail "randomly" but succeed on retry - * writes fail "randomly" but succeed on retry - * reads for some address fail and then persist until a write - * reads for some address fail and then persist irrespective of write - * writes for some address fail and persist - * all writes fail - * - * Different modes can be active at a time, but only - * one can be set at array creation. Others can be added later. - * A mode can be one-shot or recurrent with the recurrence being - * once in every N requests. - * The bottom 5 bits of the "layout" indicate the mode. The - * remainder indicate a period, or 0 for one-shot. - * - * There is an implementation limit on the number of concurrently - * persisting-faulty blocks. When a new fault is requested that would - * exceed the limit, it is ignored. - * All current faults can be clear using a layout of "0". - * - * Requests are always sent to the device. If they are to fail, - * we clone the bio and insert a new b_end_io into the chain. - */ - -#define WriteTransient 0 -#define ReadTransient 1 -#define WritePersistent 2 -#define ReadPersistent 3 -#define WriteAll 4 /* doesn't go to device */ -#define ReadFixable 5 -#define Modes 6 - -#define ClearErrors 31 -#define ClearFaults 30 - -#define AllPersist 100 /* internal use only */ -#define NoPersist 101 - -#define ModeMask 0x1f -#define ModeShift 5 - -#define MaxFault 50 -#include -#include -#include -#include -#include "md.h" -#include - - -static void faulty_fail(struct bio *bio) -{ - struct bio *b = bio->bi_private; - - b->bi_iter.bi_size = bio->bi_iter.bi_size; - b->bi_iter.bi_sector = bio->bi_iter.bi_sector; - - bio_put(bio); - - bio_io_error(b); -} - -struct faulty_conf { - int period[Modes]; - atomic_t counters[Modes]; - sector_t faults[MaxFault]; - int modes[MaxFault]; - int nfaults; - struct md_rdev *rdev; -}; - -static int check_mode(struct faulty_conf *conf, int mode) -{ - if (conf->period[mode] == 0 && - atomic_read(&conf->counters[mode]) <= 0) - return 0; /* no failure, no decrement */ - - - if (atomic_dec_and_test(&conf->counters[mode])) { - if (conf->period[mode]) - atomic_set(&conf->counters[mode], conf->period[mode]); - return 1; - } - return 0; -} - -static int check_sector(struct faulty_conf *conf, sector_t start, sector_t end, int dir) -{ - /* If we find a ReadFixable sector, we fix it ... */ - int i; - for (i=0; infaults; i++) - if (conf->faults[i] >= start && - conf->faults[i] < end) { - /* found it ... */ - switch (conf->modes[i] * 2 + dir) { - case WritePersistent*2+WRITE: return 1; - case ReadPersistent*2+READ: return 1; - case ReadFixable*2+READ: return 1; - case ReadFixable*2+WRITE: - conf->modes[i] = NoPersist; - return 0; - case AllPersist*2+READ: - case AllPersist*2+WRITE: return 1; - default: - return 0; - } - } - return 0; -} - -static void add_sector(struct faulty_conf *conf, sector_t start, int mode) -{ - int i; - int n = conf->nfaults; - for (i=0; infaults; i++) - if (conf->faults[i] == start) { - switch(mode) { - case NoPersist: conf->modes[i] = mode; return; - case WritePersistent: - if (conf->modes[i] == ReadPersistent || - conf->modes[i] == ReadFixable) - conf->modes[i] = AllPersist; - else - conf->modes[i] = WritePersistent; - return; - case ReadPersistent: - if (conf->modes[i] == WritePersistent) - conf->modes[i] = AllPersist; - else - conf->modes[i] = ReadPersistent; - return; - case ReadFixable: - if (conf->modes[i] == WritePersistent || - conf->modes[i] == ReadPersistent) - conf->modes[i] = AllPersist; - else - conf->modes[i] = ReadFixable; - return; - } - } else if (conf->modes[i] == NoPersist) - n = i; - - if (n >= MaxFault) - return; - conf->faults[n] = start; - conf->modes[n] = mode; - if (conf->nfaults == n) - conf->nfaults = n+1; -} - -static bool faulty_make_request(struct mddev *mddev, struct bio *bio) -{ - struct faulty_conf *conf = mddev->private; - int failit = 0; - - if (bio_data_dir(bio) == WRITE) { - /* write request */ - if (atomic_read(&conf->counters[WriteAll])) { - /* special case - don't decrement, don't submit_bio_noacct, - * just fail immediately - */ - bio_io_error(bio); - return true; - } - - if (check_sector(conf, bio->bi_iter.bi_sector, - bio_end_sector(bio), WRITE)) - failit = 1; - if (check_mode(conf, WritePersistent)) { - add_sector(conf, bio->bi_iter.bi_sector, - WritePersistent); - failit = 1; - } - if (check_mode(conf, WriteTransient)) - failit = 1; - } else { - /* read request */ - if (check_sector(conf, bio->bi_iter.bi_sector, - bio_end_sector(bio), READ)) - failit = 1; - if (check_mode(conf, ReadTransient)) - failit = 1; - if (check_mode(conf, ReadPersistent)) { - add_sector(conf, bio->bi_iter.bi_sector, - ReadPersistent); - failit = 1; - } - if (check_mode(conf, ReadFixable)) { - add_sector(conf, bio->bi_iter.bi_sector, - ReadFixable); - failit = 1; - } - } - - md_account_bio(mddev, &bio); - if (failit) { - struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO, - &mddev->bio_set); - - b->bi_private = bio; - b->bi_end_io = faulty_fail; - bio = b; - } else - bio_set_dev(bio, conf->rdev->bdev); - - submit_bio_noacct(bio); - return true; -} - -static void faulty_status(struct seq_file *seq, struct mddev *mddev) -{ - struct faulty_conf *conf = mddev->private; - int n; - - if ((n=atomic_read(&conf->counters[WriteTransient])) != 0) - seq_printf(seq, " WriteTransient=%d(%d)", - n, conf->period[WriteTransient]); - - if ((n=atomic_read(&conf->counters[ReadTransient])) != 0) - seq_printf(seq, " ReadTransient=%d(%d)", - n, conf->period[ReadTransient]); - - if ((n=atomic_read(&conf->counters[WritePersistent])) != 0) - seq_printf(seq, " WritePersistent=%d(%d)", - n, conf->period[WritePersistent]); - - if ((n=atomic_read(&conf->counters[ReadPersistent])) != 0) - seq_printf(seq, " ReadPersistent=%d(%d)", - n, conf->period[ReadPersistent]); - - - if ((n=atomic_read(&conf->counters[ReadFixable])) != 0) - seq_printf(seq, " ReadFixable=%d(%d)", - n, conf->period[ReadFixable]); - - if ((n=atomic_read(&conf->counters[WriteAll])) != 0) - seq_printf(seq, " WriteAll"); - - seq_printf(seq, " nfaults=%d", conf->nfaults); -} - - -static int faulty_reshape(struct mddev *mddev) -{ - int mode = mddev->new_layout & ModeMask; - int count = mddev->new_layout >> ModeShift; - struct faulty_conf *conf = mddev->private; - - if (mddev->new_layout < 0) - return 0; - - /* new layout */ - if (mode == ClearFaults) - conf->nfaults = 0; - else if (mode == ClearErrors) { - int i; - for (i=0 ; i < Modes ; i++) { - conf->period[i] = 0; - atomic_set(&conf->counters[i], 0); - } - } else if (mode < Modes) { - conf->period[mode] = count; - if (!count) count++; - atomic_set(&conf->counters[mode], count); - } else - return -EINVAL; - mddev->new_layout = -1; - mddev->layout = -1; /* makes sure further changes come through */ - return 0; -} - -static sector_t faulty_size(struct mddev *mddev, sector_t sectors, int raid_disks) -{ - WARN_ONCE(raid_disks, - "%s does not support generic reshape\n", __func__); - - if (sectors == 0) - return mddev->dev_sectors; - - return sectors; -} - -static int faulty_run(struct mddev *mddev) -{ - struct md_rdev *rdev; - int i; - struct faulty_conf *conf; - - if (md_check_no_bitmap(mddev)) - return -EINVAL; - - conf = kmalloc(sizeof(*conf), GFP_KERNEL); - if (!conf) - return -ENOMEM; - - for (i=0; icounters[i], 0); - conf->period[i] = 0; - } - conf->nfaults = 0; - - rdev_for_each(rdev, mddev) { - conf->rdev = rdev; - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - } - - md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); - mddev->private = conf; - - faulty_reshape(mddev); - - return 0; -} - -static void faulty_free(struct mddev *mddev, void *priv) -{ - struct faulty_conf *conf = priv; - - kfree(conf); -} - -static struct md_personality faulty_personality = -{ - .name = "faulty", - .level = LEVEL_FAULTY, - .owner = THIS_MODULE, - .make_request = faulty_make_request, - .run = faulty_run, - .free = faulty_free, - .status = faulty_status, - .check_reshape = faulty_reshape, - .size = faulty_size, -}; - -static int __init raid_init(void) -{ - return register_md_personality(&faulty_personality); -} - -static void raid_exit(void) -{ - unregister_md_personality(&faulty_personality); -} - -module_init(raid_init); -module_exit(raid_exit); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Fault injection personality for MD (deprecated)"); -MODULE_ALIAS("md-personality-10"); /* faulty */ -MODULE_ALIAS("md-faulty"); -MODULE_ALIAS("md-level--5"); diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index b44bbc356643..7be89a4906e7 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -103,9 +103,6 @@ typedef struct mdu_array_info_s { } mdu_array_info_t; -/* non-obvious values for 'level' */ -#define LEVEL_FAULTY (-5) - /* we need a value for 'no level specified' and 0 * means 'raid0', so we need something else. This is * for internal use only