mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-13 09:22:35 -05:00
Merge tag 'md-next-20230613' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.5/block
Pull MD updates from Song: "The major changes are: 1. Protect md_thread with rcu, by Yu Kuai; 2. Various non-urgent raid5 and raid1/10 fixes, by Yu Kuai; 3. Non-urgent raid10 fixes, by Li Nan." * tag 'md-next-20230613' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: (29 commits) md/raid1-10: limit the number of plugged bio md/raid1-10: don't handle pluged bio by daemon thread md/md-bitmap: add a new helper to unplug bitmap asynchrously md/raid1-10: submit write io directly if bitmap is not enabled md/raid1-10: factor out a helper to submit normal write md/raid1-10: factor out a helper to add bio to plug md/raid10: prevent soft lockup while flush writes md/raid10: fix io loss while replacement replace rdev md/raid10: Do not add spare disk when recovery fails md/raid10: clean up md_add_new_disk() md/raid10: prioritize adding disk to 'removed' mirror md/raid10: improve code of mrdev in raid10_sync_request md/raid10: fix null-ptr-deref of mreplace in raid10_sync_request md/raid5: don't start reshape when recovery or replace is in progress md: protect md_thread with rcu md/bitmap: factor out a helper to set timeout md/bitmap: always wake up md_thread in timeout_store dm-raid: remove useless checking in raid_message() md: factor out a helper to wake up md_thread directly md: fix duplicate filename for rdev ...
This commit is contained in:
@@ -3750,11 +3750,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
|
||||
* canceling read-auto mode
|
||||
*/
|
||||
mddev->ro = 0;
|
||||
if (!mddev->suspended && mddev->sync_thread)
|
||||
if (!mddev->suspended)
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
}
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
if (!mddev->suspended && mddev->thread)
|
||||
if (!mddev->suspended)
|
||||
md_wakeup_thread(mddev->thread);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -54,14 +54,7 @@ __acquires(bitmap->lock)
|
||||
{
|
||||
unsigned char *mappage;
|
||||
|
||||
if (page >= bitmap->pages) {
|
||||
/* This can happen if bitmap_start_sync goes beyond
|
||||
* End-of-device while looking for a whole page.
|
||||
* It is harmless.
|
||||
*/
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(page >= bitmap->pages);
|
||||
if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
|
||||
return 0;
|
||||
|
||||
@@ -1023,7 +1016,6 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
|
||||
return set;
|
||||
}
|
||||
|
||||
|
||||
/* this gets called when the md device is ready to unplug its underlying
|
||||
* (slave) device queues -- before we let any writes go down, we need to
|
||||
* sync the dirty pages of the bitmap file to disk */
|
||||
@@ -1033,8 +1025,7 @@ void md_bitmap_unplug(struct bitmap *bitmap)
|
||||
int dirty, need_write;
|
||||
int writing = 0;
|
||||
|
||||
if (!bitmap || !bitmap->storage.filemap ||
|
||||
test_bit(BITMAP_STALE, &bitmap->flags))
|
||||
if (!md_bitmap_enabled(bitmap))
|
||||
return;
|
||||
|
||||
/* look at each page to see if there are any set bits that need to be
|
||||
@@ -1063,6 +1054,35 @@ void md_bitmap_unplug(struct bitmap *bitmap)
|
||||
}
|
||||
EXPORT_SYMBOL(md_bitmap_unplug);
|
||||
|
||||
struct bitmap_unplug_work {
|
||||
struct work_struct work;
|
||||
struct bitmap *bitmap;
|
||||
struct completion *done;
|
||||
};
|
||||
|
||||
static void md_bitmap_unplug_fn(struct work_struct *work)
|
||||
{
|
||||
struct bitmap_unplug_work *unplug_work =
|
||||
container_of(work, struct bitmap_unplug_work, work);
|
||||
|
||||
md_bitmap_unplug(unplug_work->bitmap);
|
||||
complete(unplug_work->done);
|
||||
}
|
||||
|
||||
void md_bitmap_unplug_async(struct bitmap *bitmap)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
struct bitmap_unplug_work unplug_work;
|
||||
|
||||
INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn);
|
||||
unplug_work.bitmap = bitmap;
|
||||
unplug_work.done = &done;
|
||||
|
||||
queue_work(md_bitmap_wq, &unplug_work.work);
|
||||
wait_for_completion(&done);
|
||||
}
|
||||
EXPORT_SYMBOL(md_bitmap_unplug_async);
|
||||
|
||||
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
|
||||
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
|
||||
* the in-memory bitmap from the on-disk bitmap -- also, sets up the
|
||||
@@ -1241,11 +1261,28 @@ static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
|
||||
sector_t offset, sector_t *blocks,
|
||||
int create);
|
||||
|
||||
static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout,
|
||||
bool force)
|
||||
{
|
||||
struct md_thread *thread;
|
||||
|
||||
rcu_read_lock();
|
||||
thread = rcu_dereference(mddev->thread);
|
||||
|
||||
if (!thread)
|
||||
goto out;
|
||||
|
||||
if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT)
|
||||
thread->timeout = timeout;
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* bitmap daemon -- periodically wakes up to clean bits and flush pages
|
||||
* out to disk
|
||||
*/
|
||||
|
||||
void md_bitmap_daemon_work(struct mddev *mddev)
|
||||
{
|
||||
struct bitmap *bitmap;
|
||||
@@ -1269,7 +1306,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
|
||||
|
||||
bitmap->daemon_lastrun = jiffies;
|
||||
if (bitmap->allclean) {
|
||||
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
|
||||
goto done;
|
||||
}
|
||||
bitmap->allclean = 1;
|
||||
@@ -1366,8 +1403,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
|
||||
|
||||
done:
|
||||
if (bitmap->allclean == 0)
|
||||
mddev->thread->timeout =
|
||||
mddev->bitmap_info.daemon_sleep;
|
||||
mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
|
||||
mutex_unlock(&mddev->bitmap_info.mutex);
|
||||
}
|
||||
|
||||
@@ -1387,6 +1423,14 @@ __acquires(bitmap->lock)
|
||||
sector_t csize;
|
||||
int err;
|
||||
|
||||
if (page >= bitmap->pages) {
|
||||
/*
|
||||
* This can happen if bitmap_start_sync goes beyond
|
||||
* End-of-device while looking for a whole page or
|
||||
* user set a huge number to sysfs bitmap_set_bits.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
err = md_bitmap_checkpage(bitmap, page, create, 0);
|
||||
|
||||
if (bitmap->bp[page].hijacked ||
|
||||
@@ -1820,8 +1864,7 @@ void md_bitmap_destroy(struct mddev *mddev)
|
||||
mddev->bitmap = NULL; /* disconnect from the md device */
|
||||
spin_unlock(&mddev->lock);
|
||||
mutex_unlock(&mddev->bitmap_info.mutex);
|
||||
if (mddev->thread)
|
||||
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
|
||||
|
||||
md_bitmap_free(bitmap);
|
||||
}
|
||||
@@ -1964,7 +2007,7 @@ int md_bitmap_load(struct mddev *mddev)
|
||||
/* Kick recovery in case any bits were set */
|
||||
set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
|
||||
|
||||
mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
|
||||
mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
|
||||
md_bitmap_update_sb(bitmap);
|
||||
@@ -2469,17 +2512,11 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
timeout = MAX_SCHEDULE_TIMEOUT-1;
|
||||
if (timeout < 1)
|
||||
timeout = 1;
|
||||
|
||||
mddev->bitmap_info.daemon_sleep = timeout;
|
||||
if (mddev->thread) {
|
||||
/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
|
||||
* the bitmap is all clean and we don't need to
|
||||
* adjust the timeout right now
|
||||
*/
|
||||
if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
|
||||
mddev->thread->timeout = timeout;
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
}
|
||||
mddev_set_timeout(mddev, timeout, false);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
@@ -264,6 +264,7 @@ void md_bitmap_sync_with_cluster(struct mddev *mddev,
|
||||
sector_t new_lo, sector_t new_hi);
|
||||
|
||||
void md_bitmap_unplug(struct bitmap *bitmap);
|
||||
void md_bitmap_unplug_async(struct bitmap *bitmap);
|
||||
void md_bitmap_daemon_work(struct mddev *mddev);
|
||||
|
||||
int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
|
||||
@@ -273,6 +274,13 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
|
||||
sector_t *lo, sector_t *hi, bool clear_bits);
|
||||
void md_bitmap_free(struct bitmap *bitmap);
|
||||
void md_bitmap_wait_behind_writes(struct mddev *mddev);
|
||||
|
||||
static inline bool md_bitmap_enabled(struct bitmap *bitmap)
|
||||
{
|
||||
return bitmap && bitmap->storage.filemap &&
|
||||
!test_bit(BITMAP_STALE, &bitmap->flags);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -75,14 +75,14 @@ struct md_cluster_info {
|
||||
sector_t suspend_hi;
|
||||
int suspend_from; /* the slot which broadcast suspend_lo/hi */
|
||||
|
||||
struct md_thread *recovery_thread;
|
||||
struct md_thread __rcu *recovery_thread;
|
||||
unsigned long recovery_map;
|
||||
/* communication loc resources */
|
||||
struct dlm_lock_resource *ack_lockres;
|
||||
struct dlm_lock_resource *message_lockres;
|
||||
struct dlm_lock_resource *token_lockres;
|
||||
struct dlm_lock_resource *no_new_dev_lockres;
|
||||
struct md_thread *recv_thread;
|
||||
struct md_thread __rcu *recv_thread;
|
||||
struct completion newdisk_completion;
|
||||
wait_queue_head_t wait;
|
||||
unsigned long state;
|
||||
@@ -362,8 +362,8 @@ static void __recover_slot(struct mddev *mddev, int slot)
|
||||
|
||||
set_bit(slot, &cinfo->recovery_map);
|
||||
if (!cinfo->recovery_thread) {
|
||||
cinfo->recovery_thread = md_register_thread(recover_bitmaps,
|
||||
mddev, "recover");
|
||||
rcu_assign_pointer(cinfo->recovery_thread,
|
||||
md_register_thread(recover_bitmaps, mddev, "recover"));
|
||||
if (!cinfo->recovery_thread) {
|
||||
pr_warn("md-cluster: Could not create recovery thread\n");
|
||||
return;
|
||||
@@ -526,11 +526,15 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
|
||||
static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
|
||||
{
|
||||
int got_lock = 0;
|
||||
struct md_thread *thread;
|
||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
||||
mddev->good_device_nr = le32_to_cpu(msg->raid_slot);
|
||||
|
||||
dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
|
||||
wait_event(mddev->thread->wqueue,
|
||||
|
||||
/* daemaon thread must exist */
|
||||
thread = rcu_dereference_protected(mddev->thread, true);
|
||||
wait_event(thread->wqueue,
|
||||
(got_lock = mddev_trylock(mddev)) ||
|
||||
test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state));
|
||||
md_reload_sb(mddev, mddev->good_device_nr);
|
||||
@@ -889,7 +893,8 @@ static int join(struct mddev *mddev, int nodes)
|
||||
}
|
||||
/* Initiate the communication resources */
|
||||
ret = -ENOMEM;
|
||||
cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
|
||||
rcu_assign_pointer(cinfo->recv_thread,
|
||||
md_register_thread(recv_daemon, mddev, "cluster_recv"));
|
||||
if (!cinfo->recv_thread) {
|
||||
pr_err("md-cluster: cannot allocate memory for recv_thread!\n");
|
||||
goto err;
|
||||
|
||||
@@ -400,8 +400,8 @@ static int multipath_run (struct mddev *mddev)
|
||||
if (ret)
|
||||
goto out_free_conf;
|
||||
|
||||
mddev->thread = md_register_thread(multipathd, mddev,
|
||||
"multipath");
|
||||
rcu_assign_pointer(mddev->thread,
|
||||
md_register_thread(multipathd, mddev, "multipath"));
|
||||
if (!mddev->thread)
|
||||
goto out_free_conf;
|
||||
|
||||
|
||||
226
drivers/md/md.c
226
drivers/md/md.c
@@ -70,11 +70,7 @@
|
||||
#include "md-bitmap.h"
|
||||
#include "md-cluster.h"
|
||||
|
||||
/* pers_list is a list of registered personalities protected
|
||||
* by pers_lock.
|
||||
* pers_lock does extra service to protect accesses to
|
||||
* mddev->thread when the mutex cannot be held.
|
||||
*/
|
||||
/* pers_list is a list of registered personalities protected by pers_lock. */
|
||||
static LIST_HEAD(pers_list);
|
||||
static DEFINE_SPINLOCK(pers_lock);
|
||||
|
||||
@@ -87,23 +83,13 @@ static struct module *md_cluster_mod;
|
||||
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
|
||||
static struct workqueue_struct *md_wq;
|
||||
static struct workqueue_struct *md_misc_wq;
|
||||
static struct workqueue_struct *md_rdev_misc_wq;
|
||||
struct workqueue_struct *md_bitmap_wq;
|
||||
|
||||
static int remove_and_add_spares(struct mddev *mddev,
|
||||
struct md_rdev *this);
|
||||
static void mddev_detach(struct mddev *mddev);
|
||||
|
||||
enum md_ro_state {
|
||||
MD_RDWR,
|
||||
MD_RDONLY,
|
||||
MD_AUTO_READ,
|
||||
MD_MAX_STATE
|
||||
};
|
||||
|
||||
static bool md_is_rdwr(struct mddev *mddev)
|
||||
{
|
||||
return (mddev->ro == MD_RDWR);
|
||||
}
|
||||
static void export_rdev(struct md_rdev *rdev, struct mddev *mddev);
|
||||
static void md_wakeup_thread_directly(struct md_thread __rcu *thread);
|
||||
|
||||
/*
|
||||
* Default number of read corrections we'll attempt on an rdev
|
||||
@@ -360,10 +346,6 @@ EXPORT_SYMBOL_GPL(md_new_event);
|
||||
static LIST_HEAD(all_mddevs);
|
||||
static DEFINE_SPINLOCK(all_mddevs_lock);
|
||||
|
||||
static bool is_md_suspended(struct mddev *mddev)
|
||||
{
|
||||
return percpu_ref_is_dying(&mddev->active_io);
|
||||
}
|
||||
/* Rather than calling directly into the personality make_request function,
|
||||
* IO requests come here first so that we can check if the device is
|
||||
* being suspended pending a reconfiguration.
|
||||
@@ -457,13 +439,19 @@ static void md_submit_bio(struct bio *bio)
|
||||
*/
|
||||
void mddev_suspend(struct mddev *mddev)
|
||||
{
|
||||
WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
struct md_thread *thread = rcu_dereference_protected(mddev->thread,
|
||||
lockdep_is_held(&mddev->reconfig_mutex));
|
||||
|
||||
WARN_ON_ONCE(thread && current == thread->tsk);
|
||||
if (mddev->suspended++)
|
||||
return;
|
||||
wake_up(&mddev->sb_wait);
|
||||
set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
||||
percpu_ref_kill(&mddev->active_io);
|
||||
|
||||
if (mddev->pers->prepare_suspend)
|
||||
mddev->pers->prepare_suspend(mddev);
|
||||
|
||||
wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
||||
@@ -655,9 +643,11 @@ void mddev_init(struct mddev *mddev)
|
||||
{
|
||||
mutex_init(&mddev->open_mutex);
|
||||
mutex_init(&mddev->reconfig_mutex);
|
||||
mutex_init(&mddev->delete_mutex);
|
||||
mutex_init(&mddev->bitmap_info.mutex);
|
||||
INIT_LIST_HEAD(&mddev->disks);
|
||||
INIT_LIST_HEAD(&mddev->all_mddevs);
|
||||
INIT_LIST_HEAD(&mddev->deleting);
|
||||
timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
|
||||
atomic_set(&mddev->active, 1);
|
||||
atomic_set(&mddev->openers, 0);
|
||||
@@ -759,6 +749,24 @@ static void mddev_free(struct mddev *mddev)
|
||||
|
||||
static const struct attribute_group md_redundancy_group;
|
||||
|
||||
static void md_free_rdev(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *tmp;
|
||||
|
||||
mutex_lock(&mddev->delete_mutex);
|
||||
if (list_empty(&mddev->deleting))
|
||||
goto out;
|
||||
|
||||
list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
|
||||
list_del_init(&rdev->same_set);
|
||||
kobject_del(&rdev->kobj);
|
||||
export_rdev(rdev, mddev);
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&mddev->delete_mutex);
|
||||
}
|
||||
|
||||
void mddev_unlock(struct mddev *mddev)
|
||||
{
|
||||
if (mddev->to_remove) {
|
||||
@@ -800,13 +808,10 @@ void mddev_unlock(struct mddev *mddev)
|
||||
} else
|
||||
mutex_unlock(&mddev->reconfig_mutex);
|
||||
|
||||
/* As we've dropped the mutex we need a spinlock to
|
||||
* make sure the thread doesn't disappear
|
||||
*/
|
||||
spin_lock(&pers_lock);
|
||||
md_free_rdev(mddev);
|
||||
|
||||
md_wakeup_thread(mddev->thread);
|
||||
wake_up(&mddev->sb_wait);
|
||||
spin_unlock(&pers_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_unlock);
|
||||
|
||||
@@ -2440,13 +2445,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void rdev_delayed_delete(struct work_struct *ws)
|
||||
{
|
||||
struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
|
||||
kobject_del(&rdev->kobj);
|
||||
kobject_put(&rdev->kobj);
|
||||
}
|
||||
|
||||
void md_autodetect_dev(dev_t dev);
|
||||
|
||||
/* just for claiming the bdev */
|
||||
@@ -2467,6 +2465,8 @@ static void export_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
||||
|
||||
static void md_kick_rdev_from_array(struct md_rdev *rdev)
|
||||
{
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
|
||||
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
|
||||
list_del_rcu(&rdev->same_set);
|
||||
pr_debug("md: unbind<%pg>\n", rdev->bdev);
|
||||
@@ -2480,15 +2480,17 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
|
||||
rdev->sysfs_unack_badblocks = NULL;
|
||||
rdev->sysfs_badblocks = NULL;
|
||||
rdev->badblocks.count = 0;
|
||||
/* We need to delay this, otherwise we can deadlock when
|
||||
* writing to 'remove' to "dev/state". We also need
|
||||
* to delay it due to rcu usage.
|
||||
*/
|
||||
|
||||
synchronize_rcu();
|
||||
INIT_WORK(&rdev->del_work, rdev_delayed_delete);
|
||||
kobject_get(&rdev->kobj);
|
||||
queue_work(md_rdev_misc_wq, &rdev->del_work);
|
||||
export_rdev(rdev, rdev->mddev);
|
||||
|
||||
/*
|
||||
* kobject_del() will wait for all in progress writers to be done, where
|
||||
* reconfig_mutex is held, hence it can't be called under
|
||||
* reconfig_mutex and it's delayed to mddev_unlock().
|
||||
*/
|
||||
mutex_lock(&mddev->delete_mutex);
|
||||
list_add(&rdev->same_set, &mddev->deleting);
|
||||
mutex_unlock(&mddev->delete_mutex);
|
||||
}
|
||||
|
||||
static void export_array(struct mddev *mddev)
|
||||
@@ -3556,6 +3558,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
{
|
||||
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
|
||||
struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
|
||||
struct kernfs_node *kn = NULL;
|
||||
ssize_t rv;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
|
||||
@@ -3563,6 +3566,10 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
return -EIO;
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
if (entry->store == state_store && cmd_match(page, "remove"))
|
||||
kn = sysfs_break_active_protection(kobj, attr);
|
||||
|
||||
rv = mddev ? mddev_lock(mddev) : -ENODEV;
|
||||
if (!rv) {
|
||||
if (rdev->mddev == NULL)
|
||||
@@ -3571,6 +3578,10 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
rv = entry->store(rdev, page, length);
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
|
||||
if (kn)
|
||||
sysfs_unbreak_active_protection(kn);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
@@ -3796,8 +3807,9 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
|
||||
static ssize_t
|
||||
safe_delay_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
int msec = (mddev->safemode_delay*1000)/HZ;
|
||||
return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
|
||||
unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ;
|
||||
|
||||
return sprintf(page, "%u.%03u\n", msec/1000, msec%1000);
|
||||
}
|
||||
static ssize_t
|
||||
safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
|
||||
@@ -3809,7 +3821,7 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
|
||||
if (strict_strtoul_scaled(cbuf, &msec, 3) < 0 || msec > UINT_MAX / HZ)
|
||||
return -EINVAL;
|
||||
if (msec == 0)
|
||||
mddev->safemode_delay = 0;
|
||||
@@ -4479,6 +4491,8 @@ max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len
|
||||
rv = kstrtouint(buf, 10, &n);
|
||||
if (rv < 0)
|
||||
return rv;
|
||||
if (n > INT_MAX)
|
||||
return -EINVAL;
|
||||
atomic_set(&mddev->max_corr_read_errors, n);
|
||||
return len;
|
||||
}
|
||||
@@ -4493,20 +4507,6 @@ null_show(struct mddev *mddev, char *page)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* need to ensure rdev_delayed_delete() has completed */
|
||||
static void flush_rdev_wq(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev)
|
||||
if (work_pending(&rdev->del_work)) {
|
||||
flush_workqueue(md_rdev_misc_wq);
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
new_dev_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
{
|
||||
@@ -4534,7 +4534,6 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
minor != MINOR(dev))
|
||||
return -EOVERFLOW;
|
||||
|
||||
flush_rdev_wq(mddev);
|
||||
err = mddev_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -4806,11 +4805,21 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
return -EINVAL;
|
||||
err = mddev_lock(mddev);
|
||||
if (!err) {
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
err = -EBUSY;
|
||||
else {
|
||||
} else if (mddev->reshape_position == MaxSector ||
|
||||
mddev->pers->check_reshape == NULL ||
|
||||
mddev->pers->check_reshape(mddev)) {
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
err = mddev->pers->start_reshape(mddev);
|
||||
} else {
|
||||
/*
|
||||
* If reshape is still in progress, and
|
||||
* md_check_recovery() can continue to reshape,
|
||||
* don't restart reshape because data can be
|
||||
* corrupted for raid456.
|
||||
*/
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
}
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
@@ -5594,7 +5603,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
|
||||
* removed (mddev_delayed_delete).
|
||||
*/
|
||||
flush_workqueue(md_misc_wq);
|
||||
flush_workqueue(md_rdev_misc_wq);
|
||||
|
||||
mutex_lock(&disks_mutex);
|
||||
mddev = mddev_alloc(dev);
|
||||
@@ -6271,10 +6279,12 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
}
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
if (mddev->sync_thread)
|
||||
/* Thread might be blocked waiting for metadata update
|
||||
* which will now never happen */
|
||||
wake_up_process(mddev->sync_thread->tsk);
|
||||
|
||||
/*
|
||||
* Thread might be blocked waiting for metadata update which will now
|
||||
* never happen
|
||||
*/
|
||||
md_wakeup_thread_directly(mddev->sync_thread);
|
||||
|
||||
if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
|
||||
return -EBUSY;
|
||||
@@ -6335,10 +6345,12 @@ static int do_md_stop(struct mddev *mddev, int mode,
|
||||
}
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
if (mddev->sync_thread)
|
||||
/* Thread might be blocked waiting for metadata update
|
||||
* which will now never happen */
|
||||
wake_up_process(mddev->sync_thread->tsk);
|
||||
|
||||
/*
|
||||
* Thread might be blocked waiting for metadata update which will now
|
||||
* never happen
|
||||
*/
|
||||
md_wakeup_thread_directly(mddev->sync_thread);
|
||||
|
||||
mddev_unlock(mddev);
|
||||
wait_event(resync_wait, (mddev->sync_thread == NULL &&
|
||||
@@ -6735,7 +6747,6 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
||||
if (info->state & (1<<MD_DISK_SYNC) &&
|
||||
info->raid_disk < mddev->raid_disks) {
|
||||
rdev->raid_disk = info->raid_disk;
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
clear_bit(Bitmap_sync, &rdev->flags);
|
||||
} else
|
||||
rdev->raid_disk = -1;
|
||||
@@ -7557,9 +7568,6 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
|
||||
}
|
||||
|
||||
if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
|
||||
flush_rdev_wq(mddev);
|
||||
|
||||
if (cmd == HOT_REMOVE_DISK)
|
||||
/* need to ensure recovery thread has run */
|
||||
wait_event_interruptible_timeout(mddev->sb_wait,
|
||||
@@ -7888,13 +7896,29 @@ static int md_thread(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void md_wakeup_thread(struct md_thread *thread)
|
||||
static void md_wakeup_thread_directly(struct md_thread __rcu *thread)
|
||||
{
|
||||
if (thread) {
|
||||
pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
|
||||
set_bit(THREAD_WAKEUP, &thread->flags);
|
||||
wake_up(&thread->wqueue);
|
||||
struct md_thread *t;
|
||||
|
||||
rcu_read_lock();
|
||||
t = rcu_dereference(thread);
|
||||
if (t)
|
||||
wake_up_process(t->tsk);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void md_wakeup_thread(struct md_thread __rcu *thread)
|
||||
{
|
||||
struct md_thread *t;
|
||||
|
||||
rcu_read_lock();
|
||||
t = rcu_dereference(thread);
|
||||
if (t) {
|
||||
pr_debug("md: waking up MD thread %s.\n", t->tsk->comm);
|
||||
set_bit(THREAD_WAKEUP, &t->flags);
|
||||
wake_up(&t->wqueue);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(md_wakeup_thread);
|
||||
|
||||
@@ -7924,22 +7948,15 @@ struct md_thread *md_register_thread(void (*run) (struct md_thread *),
|
||||
}
|
||||
EXPORT_SYMBOL(md_register_thread);
|
||||
|
||||
void md_unregister_thread(struct md_thread **threadp)
|
||||
void md_unregister_thread(struct md_thread __rcu **threadp)
|
||||
{
|
||||
struct md_thread *thread;
|
||||
struct md_thread *thread = rcu_dereference_protected(*threadp, true);
|
||||
|
||||
/*
|
||||
* Locking ensures that mddev_unlock does not wake_up a
|
||||
* non-existent thread
|
||||
*/
|
||||
spin_lock(&pers_lock);
|
||||
thread = *threadp;
|
||||
if (!thread) {
|
||||
spin_unlock(&pers_lock);
|
||||
if (!thread)
|
||||
return;
|
||||
}
|
||||
*threadp = NULL;
|
||||
spin_unlock(&pers_lock);
|
||||
|
||||
rcu_assign_pointer(*threadp, NULL);
|
||||
synchronize_rcu();
|
||||
|
||||
pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
|
||||
kthread_stop(thread->tsk);
|
||||
@@ -9102,6 +9119,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
spin_unlock(&mddev->lock);
|
||||
|
||||
wake_up(&resync_wait);
|
||||
wake_up(&mddev->sb_wait);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
return;
|
||||
}
|
||||
@@ -9204,9 +9222,8 @@ static void md_start_sync(struct work_struct *ws)
|
||||
{
|
||||
struct mddev *mddev = container_of(ws, struct mddev, del_work);
|
||||
|
||||
mddev->sync_thread = md_register_thread(md_do_sync,
|
||||
mddev,
|
||||
"resync");
|
||||
rcu_assign_pointer(mddev->sync_thread,
|
||||
md_register_thread(md_do_sync, mddev, "resync"));
|
||||
if (!mddev->sync_thread) {
|
||||
pr_warn("%s: could not start resync thread...\n",
|
||||
mdname(mddev));
|
||||
@@ -9621,9 +9638,10 @@ static int __init md_init(void)
|
||||
if (!md_misc_wq)
|
||||
goto err_misc_wq;
|
||||
|
||||
md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
|
||||
if (!md_rdev_misc_wq)
|
||||
goto err_rdev_misc_wq;
|
||||
md_bitmap_wq = alloc_workqueue("md_bitmap", WQ_MEM_RECLAIM | WQ_UNBOUND,
|
||||
0);
|
||||
if (!md_bitmap_wq)
|
||||
goto err_bitmap_wq;
|
||||
|
||||
ret = __register_blkdev(MD_MAJOR, "md", md_probe);
|
||||
if (ret < 0)
|
||||
@@ -9643,8 +9661,8 @@ static int __init md_init(void)
|
||||
err_mdp:
|
||||
unregister_blkdev(MD_MAJOR, "md");
|
||||
err_md:
|
||||
destroy_workqueue(md_rdev_misc_wq);
|
||||
err_rdev_misc_wq:
|
||||
destroy_workqueue(md_bitmap_wq);
|
||||
err_bitmap_wq:
|
||||
destroy_workqueue(md_misc_wq);
|
||||
err_misc_wq:
|
||||
destroy_workqueue(md_wq);
|
||||
@@ -9940,8 +9958,8 @@ static __exit void md_exit(void)
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
destroy_workqueue(md_rdev_misc_wq);
|
||||
destroy_workqueue(md_misc_wq);
|
||||
destroy_workqueue(md_bitmap_wq);
|
||||
destroy_workqueue(md_wq);
|
||||
}
|
||||
|
||||
|
||||
@@ -122,8 +122,6 @@ struct md_rdev {
|
||||
|
||||
struct serial_in_rdev *serial; /* used for raid1 io serialization */
|
||||
|
||||
struct work_struct del_work; /* used for delayed sysfs removal */
|
||||
|
||||
struct kernfs_node *sysfs_state; /* handle for 'state'
|
||||
* sysfs entry */
|
||||
/* handle for 'unacknowledged_bad_blocks' sysfs dentry */
|
||||
@@ -367,8 +365,8 @@ struct mddev {
|
||||
int new_chunk_sectors;
|
||||
int reshape_backwards;
|
||||
|
||||
struct md_thread *thread; /* management thread */
|
||||
struct md_thread *sync_thread; /* doing resync or reconstruct */
|
||||
struct md_thread __rcu *thread; /* management thread */
|
||||
struct md_thread __rcu *sync_thread; /* doing resync or reconstruct */
|
||||
|
||||
/* 'last_sync_action' is initialized to "none". It is set when a
|
||||
* sync operation (i.e "data-check", "requested-resync", "resync",
|
||||
@@ -531,6 +529,14 @@ struct mddev {
|
||||
unsigned int good_device_nr; /* good device num within cluster raid */
|
||||
unsigned int noio_flag; /* for memalloc scope API */
|
||||
|
||||
/*
|
||||
* Temporarily store rdev that will be finally removed when
|
||||
* reconfig_mutex is unlocked.
|
||||
*/
|
||||
struct list_head deleting;
|
||||
/* Protect the deleting list */
|
||||
struct mutex delete_mutex;
|
||||
|
||||
bool has_superblocks:1;
|
||||
bool fail_last_dev:1;
|
||||
bool serialize_policy:1;
|
||||
@@ -555,6 +561,23 @@ enum recovery_flags {
|
||||
MD_RESYNCING_REMOTE, /* remote node is running resync thread */
|
||||
};
|
||||
|
||||
enum md_ro_state {
|
||||
MD_RDWR,
|
||||
MD_RDONLY,
|
||||
MD_AUTO_READ,
|
||||
MD_MAX_STATE
|
||||
};
|
||||
|
||||
static inline bool md_is_rdwr(struct mddev *mddev)
|
||||
{
|
||||
return (mddev->ro == MD_RDWR);
|
||||
}
|
||||
|
||||
static inline bool is_md_suspended(struct mddev *mddev)
|
||||
{
|
||||
return percpu_ref_is_dying(&mddev->active_io);
|
||||
}
|
||||
|
||||
static inline int __must_check mddev_lock(struct mddev *mddev)
|
||||
{
|
||||
return mutex_lock_interruptible(&mddev->reconfig_mutex);
|
||||
@@ -614,6 +637,7 @@ struct md_personality
|
||||
int (*start_reshape) (struct mddev *mddev);
|
||||
void (*finish_reshape) (struct mddev *mddev);
|
||||
void (*update_reshape_pos) (struct mddev *mddev);
|
||||
void (*prepare_suspend) (struct mddev *mddev);
|
||||
/* quiesce suspends or resumes internal processing.
|
||||
* 1 - stop new actions and wait for action io to complete
|
||||
* 0 - return to normal behaviour
|
||||
@@ -734,8 +758,8 @@ extern struct md_thread *md_register_thread(
|
||||
void (*run)(struct md_thread *thread),
|
||||
struct mddev *mddev,
|
||||
const char *name);
|
||||
extern void md_unregister_thread(struct md_thread **threadp);
|
||||
extern void md_wakeup_thread(struct md_thread *thread);
|
||||
extern void md_unregister_thread(struct md_thread __rcu **threadp);
|
||||
extern void md_wakeup_thread(struct md_thread __rcu *thread);
|
||||
extern void md_check_recovery(struct mddev *mddev);
|
||||
extern void md_reap_sync_thread(struct mddev *mddev);
|
||||
extern int mddev_init_writes_pending(struct mddev *mddev);
|
||||
@@ -828,6 +852,7 @@ struct mdu_array_info_s;
|
||||
struct mdu_disk_info_s;
|
||||
|
||||
extern int mdp_major;
|
||||
extern struct workqueue_struct *md_bitmap_wq;
|
||||
void md_autostart_arrays(int part);
|
||||
int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
|
||||
int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#define IO_MADE_GOOD ((struct bio *)2)
|
||||
|
||||
#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
|
||||
#define MAX_PLUG_BIO 32
|
||||
|
||||
/* for managing resync I/O pages */
|
||||
struct resync_pages {
|
||||
@@ -31,6 +32,7 @@ struct resync_pages {
|
||||
struct raid1_plug_cb {
|
||||
struct blk_plug_cb cb;
|
||||
struct bio_list pending;
|
||||
unsigned int count;
|
||||
};
|
||||
|
||||
static void rbio_pool_free(void *rbio, void *data)
|
||||
@@ -110,3 +112,64 @@ static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
|
||||
size -= len;
|
||||
} while (idx++ < RESYNC_PAGES && size > 0);
|
||||
}
|
||||
|
||||
|
||||
static inline void raid1_submit_write(struct bio *bio)
|
||||
{
|
||||
struct md_rdev *rdev = (struct md_rdev *)bio->bi_bdev;
|
||||
|
||||
bio->bi_next = NULL;
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
bio_io_error(bio);
|
||||
else if (unlikely(bio_op(bio) == REQ_OP_DISCARD &&
|
||||
!bdev_max_discard_sectors(bio->bi_bdev)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio,
|
||||
blk_plug_cb_fn unplug, int copies)
|
||||
{
|
||||
struct raid1_plug_cb *plug = NULL;
|
||||
struct blk_plug_cb *cb;
|
||||
|
||||
/*
|
||||
* If bitmap is not enabled, it's safe to submit the io directly, and
|
||||
* this can get optimal performance.
|
||||
*/
|
||||
if (!md_bitmap_enabled(mddev->bitmap)) {
|
||||
raid1_submit_write(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
cb = blk_check_plugged(unplug, mddev, sizeof(*plug));
|
||||
if (!cb)
|
||||
return false;
|
||||
|
||||
plug = container_of(cb, struct raid1_plug_cb, cb);
|
||||
bio_list_add(&plug->pending, bio);
|
||||
if (++plug->count / MAX_PLUG_BIO >= copies) {
|
||||
list_del(&cb->list);
|
||||
cb->callback(cb, false);
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* current->bio_list will be set under submit_bio() context, in this case bitmap
|
||||
* io will be added to the list and wait for current io submission to finish,
|
||||
* while current io submission must wait for bitmap io to be done. In order to
|
||||
* avoid such deadlock, submit bitmap io asynchronously.
|
||||
*/
|
||||
static inline void raid1_prepare_flush_writes(struct bitmap *bitmap)
|
||||
{
|
||||
if (current->bio_list)
|
||||
md_bitmap_unplug_async(bitmap);
|
||||
else
|
||||
md_bitmap_unplug(bitmap);
|
||||
}
|
||||
|
||||
@@ -794,22 +794,13 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
static void flush_bio_list(struct r1conf *conf, struct bio *bio)
|
||||
{
|
||||
/* flush any pending bitmap writes to disk before proceeding w/ I/O */
|
||||
md_bitmap_unplug(conf->mddev->bitmap);
|
||||
raid1_prepare_flush_writes(conf->mddev->bitmap);
|
||||
wake_up(&conf->wait_barrier);
|
||||
|
||||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
struct md_rdev *rdev = (void *)bio->bi_bdev;
|
||||
bio->bi_next = NULL;
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
bio_io_error(bio);
|
||||
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!bdev_max_discard_sectors(bio->bi_bdev)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
raid1_submit_write(bio);
|
||||
bio = next;
|
||||
cond_resched();
|
||||
}
|
||||
@@ -1178,7 +1169,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
struct r1conf *conf = mddev->private;
|
||||
struct bio *bio;
|
||||
|
||||
if (from_schedule || current->bio_list) {
|
||||
if (from_schedule) {
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
bio_list_merge(&conf->pending_bio_list, &plug->pending);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
@@ -1346,8 +1337,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
unsigned long flags;
|
||||
struct md_rdev *blocked_rdev;
|
||||
struct blk_plug_cb *cb;
|
||||
struct raid1_plug_cb *plug = NULL;
|
||||
int first_clone;
|
||||
int max_sectors;
|
||||
bool write_behind = false;
|
||||
@@ -1576,15 +1565,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r1_bio->sector);
|
||||
/* flush_pending_writes() needs access to the rdev so...*/
|
||||
mbio->bi_bdev = (void *)rdev;
|
||||
|
||||
cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
|
||||
if (cb)
|
||||
plug = container_of(cb, struct raid1_plug_cb, cb);
|
||||
else
|
||||
plug = NULL;
|
||||
if (plug) {
|
||||
bio_list_add(&plug->pending, mbio);
|
||||
} else {
|
||||
if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug, disks)) {
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
bio_list_add(&conf->pending_bio_list, mbio);
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
@@ -3087,7 +3068,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
conf->thread = md_register_thread(raid1d, mddev, "raid1");
|
||||
rcu_assign_pointer(conf->thread,
|
||||
md_register_thread(raid1d, mddev, "raid1"));
|
||||
if (!conf->thread)
|
||||
goto abort;
|
||||
|
||||
@@ -3180,8 +3162,8 @@ static int raid1_run(struct mddev *mddev)
|
||||
/*
|
||||
* Ok, everything is just fine now
|
||||
*/
|
||||
mddev->thread = conf->thread;
|
||||
conf->thread = NULL;
|
||||
rcu_assign_pointer(mddev->thread, conf->thread);
|
||||
rcu_assign_pointer(conf->thread, NULL);
|
||||
mddev->private = conf;
|
||||
set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
|
||||
|
||||
|
||||
@@ -130,7 +130,7 @@ struct r1conf {
|
||||
/* When taking over an array from a different personality, we store
|
||||
* the new thread here until we fully activate the array.
|
||||
*/
|
||||
struct md_thread *thread;
|
||||
struct md_thread __rcu *thread;
|
||||
|
||||
/* Keep track of cluster resync window to send to other
|
||||
* nodes.
|
||||
|
||||
@@ -779,8 +779,16 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
||||
disk = r10_bio->devs[slot].devnum;
|
||||
rdev = rcu_dereference(conf->mirrors[disk].replacement);
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
|
||||
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
|
||||
r10_bio->devs[slot].addr + sectors >
|
||||
rdev->recovery_offset) {
|
||||
/*
|
||||
* Read replacement first to prevent reading both rdev
|
||||
* and replacement as NULL during replacement replace
|
||||
* rdev.
|
||||
*/
|
||||
smp_mb();
|
||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
}
|
||||
if (rdev == NULL ||
|
||||
test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
@@ -902,25 +910,15 @@ static void flush_pending_writes(struct r10conf *conf)
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
blk_start_plug(&plug);
|
||||
/* flush any pending bitmap writes to disk
|
||||
* before proceeding w/ I/O */
|
||||
md_bitmap_unplug(conf->mddev->bitmap);
|
||||
raid1_prepare_flush_writes(conf->mddev->bitmap);
|
||||
wake_up(&conf->wait_barrier);
|
||||
|
||||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
struct md_rdev *rdev = (void*)bio->bi_bdev;
|
||||
bio->bi_next = NULL;
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
bio_io_error(bio);
|
||||
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!bdev_max_discard_sectors(bio->bi_bdev)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
raid1_submit_write(bio);
|
||||
bio = next;
|
||||
cond_resched();
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
} else
|
||||
@@ -982,6 +980,7 @@ static void lower_barrier(struct r10conf *conf)
|
||||
static bool stop_waiting_barrier(struct r10conf *conf)
|
||||
{
|
||||
struct bio_list *bio_list = current->bio_list;
|
||||
struct md_thread *thread;
|
||||
|
||||
/* barrier is dropped */
|
||||
if (!conf->barrier)
|
||||
@@ -997,12 +996,14 @@ static bool stop_waiting_barrier(struct r10conf *conf)
|
||||
(!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1])))
|
||||
return true;
|
||||
|
||||
/* daemon thread must exist while handling io */
|
||||
thread = rcu_dereference_protected(conf->mddev->thread, true);
|
||||
/*
|
||||
* move on if io is issued from raid10d(), nr_pending is not released
|
||||
* from original io(see handle_read_error()). All raise barrier is
|
||||
* blocked until this io is done.
|
||||
*/
|
||||
if (conf->mddev->thread->tsk == current) {
|
||||
if (thread->tsk == current) {
|
||||
WARN_ON_ONCE(atomic_read(&conf->nr_pending) == 0);
|
||||
return true;
|
||||
}
|
||||
@@ -1113,7 +1114,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct bio *bio;
|
||||
|
||||
if (from_schedule || current->bio_list) {
|
||||
if (from_schedule) {
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
bio_list_merge(&conf->pending_bio_list, &plug->pending);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
@@ -1125,23 +1126,15 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
|
||||
/* we aren't scheduling, so we can do the write-out directly. */
|
||||
bio = bio_list_get(&plug->pending);
|
||||
md_bitmap_unplug(mddev->bitmap);
|
||||
raid1_prepare_flush_writes(mddev->bitmap);
|
||||
wake_up(&conf->wait_barrier);
|
||||
|
||||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
struct md_rdev *rdev = (void*)bio->bi_bdev;
|
||||
bio->bi_next = NULL;
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
bio_io_error(bio);
|
||||
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!bdev_max_discard_sectors(bio->bi_bdev)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
raid1_submit_write(bio);
|
||||
bio = next;
|
||||
cond_resched();
|
||||
}
|
||||
kfree(plug);
|
||||
}
|
||||
@@ -1282,8 +1275,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
|
||||
const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
|
||||
unsigned long flags;
|
||||
struct blk_plug_cb *cb;
|
||||
struct raid1_plug_cb *plug = NULL;
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct md_rdev *rdev;
|
||||
int devnum = r10_bio->devs[n_copy].devnum;
|
||||
@@ -1323,14 +1314,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
|
||||
cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
|
||||
if (cb)
|
||||
plug = container_of(cb, struct raid1_plug_cb, cb);
|
||||
else
|
||||
plug = NULL;
|
||||
if (plug) {
|
||||
bio_list_add(&plug->pending, mbio);
|
||||
} else {
|
||||
if (!raid1_add_bio_to_plug(mddev, mbio, raid10_unplug, conf->copies)) {
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
bio_list_add(&conf->pending_bio_list, mbio);
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
@@ -1479,9 +1463,15 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
|
||||
for (i = 0; i < conf->copies; i++) {
|
||||
int d = r10_bio->devs[i].devnum;
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
struct md_rdev *rrdev = rcu_dereference(
|
||||
conf->mirrors[d].replacement);
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rrdev = rcu_dereference(conf->mirrors[d].replacement);
|
||||
/*
|
||||
* Read replacement first to prevent reading both rdev and
|
||||
* replacement as NULL during replacement replace rdev.
|
||||
*/
|
||||
smp_mb();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev == rrdev)
|
||||
rrdev = NULL;
|
||||
if (rdev && (test_bit(Faulty, &rdev->flags)))
|
||||
@@ -2148,9 +2138,10 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
int err = -EEXIST;
|
||||
int mirror;
|
||||
int mirror, repl_slot = -1;
|
||||
int first = 0;
|
||||
int last = conf->geo.raid_disks - 1;
|
||||
struct raid10_info *p;
|
||||
|
||||
if (mddev->recovery_cp < MaxSector)
|
||||
/* only hot-add to in-sync arrays, as recovery is
|
||||
@@ -2173,23 +2164,14 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
else
|
||||
mirror = first;
|
||||
for ( ; mirror <= last ; mirror++) {
|
||||
struct raid10_info *p = &conf->mirrors[mirror];
|
||||
p = &conf->mirrors[mirror];
|
||||
if (p->recovery_disabled == mddev->recovery_disabled)
|
||||
continue;
|
||||
if (p->rdev) {
|
||||
if (!test_bit(WantReplacement, &p->rdev->flags) ||
|
||||
p->replacement != NULL)
|
||||
continue;
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
rdev->raid_disk = mirror;
|
||||
err = 0;
|
||||
if (mddev->gendisk)
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p->replacement, rdev);
|
||||
break;
|
||||
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||
p->replacement == NULL && repl_slot < 0)
|
||||
repl_slot = mirror;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (mddev->gendisk)
|
||||
@@ -2206,6 +2188,19 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
break;
|
||||
}
|
||||
|
||||
if (err && repl_slot >= 0) {
|
||||
p = &conf->mirrors[repl_slot];
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
rdev->raid_disk = repl_slot;
|
||||
err = 0;
|
||||
if (mddev->gendisk)
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p->replacement, rdev);
|
||||
}
|
||||
|
||||
print_conf(conf);
|
||||
return err;
|
||||
}
|
||||
@@ -3303,6 +3298,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
int chunks_skipped = 0;
|
||||
sector_t chunk_mask = conf->geo.chunk_mask;
|
||||
int page_idx = 0;
|
||||
int error_disk = -1;
|
||||
|
||||
/*
|
||||
* Allow skipping a full rebuild for incremental assembly
|
||||
@@ -3386,8 +3382,21 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
return reshape_request(mddev, sector_nr, skipped);
|
||||
|
||||
if (chunks_skipped >= conf->geo.raid_disks) {
|
||||
/* if there has been nothing to do on any drive,
|
||||
* then there is nothing to do at all..
|
||||
pr_err("md/raid10:%s: %s fails\n", mdname(mddev),
|
||||
test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? "resync" : "recovery");
|
||||
if (error_disk >= 0 &&
|
||||
!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||
/*
|
||||
* recovery fails, set mirrors.recovery_disabled,
|
||||
* device shouldn't be added to there.
|
||||
*/
|
||||
conf->mirrors[error_disk].recovery_disabled =
|
||||
mddev->recovery_disabled;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* if there has been nothing to do on any drive,
|
||||
* then there is nothing to do at all.
|
||||
*/
|
||||
*skipped = 1;
|
||||
return (max_sector - sector_nr) + sectors_skipped;
|
||||
@@ -3437,8 +3446,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
sector_t sect;
|
||||
int must_sync;
|
||||
int any_working;
|
||||
int need_recover = 0;
|
||||
int need_replace = 0;
|
||||
struct raid10_info *mirror = &conf->mirrors[i];
|
||||
struct md_rdev *mrdev, *mreplace;
|
||||
|
||||
@@ -3446,15 +3453,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
mrdev = rcu_dereference(mirror->rdev);
|
||||
mreplace = rcu_dereference(mirror->replacement);
|
||||
|
||||
if (mrdev != NULL &&
|
||||
!test_bit(Faulty, &mrdev->flags) &&
|
||||
!test_bit(In_sync, &mrdev->flags))
|
||||
need_recover = 1;
|
||||
if (mreplace != NULL &&
|
||||
!test_bit(Faulty, &mreplace->flags))
|
||||
need_replace = 1;
|
||||
if (mrdev && (test_bit(Faulty, &mrdev->flags) ||
|
||||
test_bit(In_sync, &mrdev->flags)))
|
||||
mrdev = NULL;
|
||||
if (mreplace && test_bit(Faulty, &mreplace->flags))
|
||||
mreplace = NULL;
|
||||
|
||||
if (!need_recover && !need_replace) {
|
||||
if (!mrdev && !mreplace) {
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
@@ -3470,8 +3475,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
if (mreplace && test_bit(Faulty, &mreplace->flags))
|
||||
mreplace = NULL;
|
||||
/* Unless we are doing a full sync, or a replacement
|
||||
* we only need to recover the block if it is set in
|
||||
* the bitmap
|
||||
@@ -3490,7 +3493,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
atomic_inc(&mrdev->nr_pending);
|
||||
if (mrdev)
|
||||
atomic_inc(&mrdev->nr_pending);
|
||||
if (mreplace)
|
||||
atomic_inc(&mreplace->nr_pending);
|
||||
rcu_read_unlock();
|
||||
@@ -3577,7 +3581,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
r10_bio->devs[1].devnum = i;
|
||||
r10_bio->devs[1].addr = to_addr;
|
||||
|
||||
if (need_recover) {
|
||||
if (mrdev) {
|
||||
bio = r10_bio->devs[1].bio;
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
@@ -3594,11 +3598,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio = r10_bio->devs[1].repl_bio;
|
||||
if (bio)
|
||||
bio->bi_end_io = NULL;
|
||||
/* Note: if need_replace, then bio
|
||||
/* Note: if replace is not NULL, then bio
|
||||
* cannot be NULL as r10buf_pool_alloc will
|
||||
* have allocated it.
|
||||
*/
|
||||
if (!need_replace)
|
||||
if (!mreplace)
|
||||
break;
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
@@ -3622,7 +3626,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
for (k = 0; k < conf->copies; k++)
|
||||
if (r10_bio->devs[k].devnum == i)
|
||||
break;
|
||||
if (!test_bit(In_sync,
|
||||
if (mrdev && !test_bit(In_sync,
|
||||
&mrdev->flags)
|
||||
&& !rdev_set_badblocks(
|
||||
mrdev,
|
||||
@@ -3643,17 +3647,21 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
mdname(mddev));
|
||||
mirror->recovery_disabled
|
||||
= mddev->recovery_disabled;
|
||||
} else {
|
||||
error_disk = i;
|
||||
}
|
||||
put_buf(r10_bio);
|
||||
if (rb2)
|
||||
atomic_dec(&rb2->remaining);
|
||||
r10_bio = rb2;
|
||||
rdev_dec_pending(mrdev, mddev);
|
||||
if (mrdev)
|
||||
rdev_dec_pending(mrdev, mddev);
|
||||
if (mreplace)
|
||||
rdev_dec_pending(mreplace, mddev);
|
||||
break;
|
||||
}
|
||||
rdev_dec_pending(mrdev, mddev);
|
||||
if (mrdev)
|
||||
rdev_dec_pending(mrdev, mddev);
|
||||
if (mreplace)
|
||||
rdev_dec_pending(mreplace, mddev);
|
||||
if (r10_bio->devs[0].bio->bi_opf & MD_FAILFAST) {
|
||||
@@ -4107,7 +4115,8 @@ static struct r10conf *setup_conf(struct mddev *mddev)
|
||||
atomic_set(&conf->nr_pending, 0);
|
||||
|
||||
err = -ENOMEM;
|
||||
conf->thread = md_register_thread(raid10d, mddev, "raid10");
|
||||
rcu_assign_pointer(conf->thread,
|
||||
md_register_thread(raid10d, mddev, "raid10"));
|
||||
if (!conf->thread)
|
||||
goto out;
|
||||
|
||||
@@ -4152,8 +4161,8 @@ static int raid10_run(struct mddev *mddev)
|
||||
if (!conf)
|
||||
goto out;
|
||||
|
||||
mddev->thread = conf->thread;
|
||||
conf->thread = NULL;
|
||||
rcu_assign_pointer(mddev->thread, conf->thread);
|
||||
rcu_assign_pointer(conf->thread, NULL);
|
||||
|
||||
if (mddev_is_clustered(conf->mddev)) {
|
||||
int fc, fo;
|
||||
@@ -4296,8 +4305,8 @@ static int raid10_run(struct mddev *mddev)
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
|
||||
"reshape");
|
||||
rcu_assign_pointer(mddev->sync_thread,
|
||||
md_register_thread(md_do_sync, mddev, "reshape"));
|
||||
if (!mddev->sync_thread)
|
||||
goto out_free_conf;
|
||||
}
|
||||
@@ -4698,8 +4707,8 @@ static int raid10_start_reshape(struct mddev *mddev)
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
|
||||
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
|
||||
"reshape");
|
||||
rcu_assign_pointer(mddev->sync_thread,
|
||||
md_register_thread(md_do_sync, mddev, "reshape"));
|
||||
if (!mddev->sync_thread) {
|
||||
ret = -EAGAIN;
|
||||
goto abort;
|
||||
|
||||
@@ -100,7 +100,7 @@ struct r10conf {
|
||||
/* When taking over an array from a different personality, we store
|
||||
* the new thread here until we fully activate the array.
|
||||
*/
|
||||
struct md_thread *thread;
|
||||
struct md_thread __rcu *thread;
|
||||
|
||||
/*
|
||||
* Keep track of cluster resync window to send to other nodes.
|
||||
|
||||
@@ -120,7 +120,7 @@ struct r5l_log {
|
||||
struct bio_set bs;
|
||||
mempool_t meta_pool;
|
||||
|
||||
struct md_thread *reclaim_thread;
|
||||
struct md_thread __rcu *reclaim_thread;
|
||||
unsigned long reclaim_target; /* number of space that need to be
|
||||
* reclaimed. if it's 0, reclaim spaces
|
||||
* used by io_units which are in
|
||||
@@ -1576,17 +1576,18 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
|
||||
|
||||
void r5l_quiesce(struct r5l_log *log, int quiesce)
|
||||
{
|
||||
struct mddev *mddev;
|
||||
struct mddev *mddev = log->rdev->mddev;
|
||||
struct md_thread *thread = rcu_dereference_protected(
|
||||
log->reclaim_thread, lockdep_is_held(&mddev->reconfig_mutex));
|
||||
|
||||
if (quiesce) {
|
||||
/* make sure r5l_write_super_and_discard_space exits */
|
||||
mddev = log->rdev->mddev;
|
||||
wake_up(&mddev->sb_wait);
|
||||
kthread_park(log->reclaim_thread->tsk);
|
||||
kthread_park(thread->tsk);
|
||||
r5l_wake_reclaim(log, MaxSector);
|
||||
r5l_do_reclaim(log);
|
||||
} else
|
||||
kthread_unpark(log->reclaim_thread->tsk);
|
||||
kthread_unpark(thread->tsk);
|
||||
}
|
||||
|
||||
bool r5l_log_disk_error(struct r5conf *conf)
|
||||
@@ -3063,6 +3064,7 @@ void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
|
||||
{
|
||||
struct r5l_log *log;
|
||||
struct md_thread *thread;
|
||||
int ret;
|
||||
|
||||
pr_debug("md/raid:%s: using device %pg as journal\n",
|
||||
@@ -3121,11 +3123,13 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
|
||||
spin_lock_init(&log->tree_lock);
|
||||
INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN);
|
||||
|
||||
log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
|
||||
log->rdev->mddev, "reclaim");
|
||||
if (!log->reclaim_thread)
|
||||
thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev,
|
||||
"reclaim");
|
||||
if (!thread)
|
||||
goto reclaim_thread;
|
||||
log->reclaim_thread->timeout = R5C_RECLAIM_WAKEUP_INTERVAL;
|
||||
|
||||
thread->timeout = R5C_RECLAIM_WAKEUP_INTERVAL;
|
||||
rcu_assign_pointer(log->reclaim_thread, thread);
|
||||
|
||||
init_waitqueue_head(&log->iounit_wait);
|
||||
|
||||
|
||||
@@ -5966,6 +5966,19 @@ static int add_all_stripe_bios(struct r5conf *conf,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool reshape_inprogress(struct mddev *mddev)
|
||||
{
|
||||
return test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_DONE, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
}
|
||||
|
||||
static bool reshape_disabled(struct mddev *mddev)
|
||||
{
|
||||
return is_md_suspended(mddev) || !md_is_rdwr(mddev);
|
||||
}
|
||||
|
||||
static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
struct r5conf *conf, struct stripe_request_ctx *ctx,
|
||||
sector_t logical_sector, struct bio *bi)
|
||||
@@ -5997,7 +6010,8 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
if (ahead_of_reshape(mddev, logical_sector,
|
||||
conf->reshape_safe)) {
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
return STRIPE_SCHEDULE_AND_RETRY;
|
||||
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
@@ -6076,6 +6090,15 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
|
||||
out_release:
|
||||
raid5_release_stripe(sh);
|
||||
out:
|
||||
if (ret == STRIPE_SCHEDULE_AND_RETRY && !reshape_inprogress(mddev) &&
|
||||
reshape_disabled(mddev)) {
|
||||
bi->bi_status = BLK_STS_IOERR;
|
||||
ret = STRIPE_FAIL;
|
||||
pr_err("md/raid456:%s: io failed across reshape position while reshape can't make progress.\n",
|
||||
mdname(mddev));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -7708,7 +7731,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
}
|
||||
|
||||
sprintf(pers_name, "raid%d", mddev->new_level);
|
||||
conf->thread = md_register_thread(raid5d, mddev, pers_name);
|
||||
rcu_assign_pointer(conf->thread,
|
||||
md_register_thread(raid5d, mddev, pers_name));
|
||||
if (!conf->thread) {
|
||||
pr_warn("md/raid:%s: couldn't allocate thread.\n",
|
||||
mdname(mddev));
|
||||
@@ -7931,8 +7955,8 @@ static int raid5_run(struct mddev *mddev)
|
||||
}
|
||||
|
||||
conf->min_offset_diff = min_offset_diff;
|
||||
mddev->thread = conf->thread;
|
||||
conf->thread = NULL;
|
||||
rcu_assign_pointer(mddev->thread, conf->thread);
|
||||
rcu_assign_pointer(conf->thread, NULL);
|
||||
mddev->private = conf;
|
||||
|
||||
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
|
||||
@@ -8029,8 +8053,8 @@ static int raid5_run(struct mddev *mddev)
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
|
||||
"reshape");
|
||||
rcu_assign_pointer(mddev->sync_thread,
|
||||
md_register_thread(md_do_sync, mddev, "reshape"));
|
||||
if (!mddev->sync_thread)
|
||||
goto abort;
|
||||
}
|
||||
@@ -8377,6 +8401,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
p = conf->disks + disk;
|
||||
tmp = rdev_mdlock_deref(mddev, p->rdev);
|
||||
if (test_bit(WantReplacement, &tmp->flags) &&
|
||||
mddev->reshape_position == MaxSector &&
|
||||
p->replacement == NULL) {
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
@@ -8500,6 +8525,7 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct md_rdev *rdev;
|
||||
int spares = 0;
|
||||
int i;
|
||||
unsigned long flags;
|
||||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
@@ -8511,6 +8537,13 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
if (has_failed(conf))
|
||||
return -EINVAL;
|
||||
|
||||
/* raid5 can't handle concurrent reshape and recovery */
|
||||
if (mddev->recovery_cp < MaxSector)
|
||||
return -EBUSY;
|
||||
for (i = 0; i < conf->raid_disks; i++)
|
||||
if (rdev_mdlock_deref(mddev, conf->disks[i].replacement))
|
||||
return -EBUSY;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (!test_bit(In_sync, &rdev->flags)
|
||||
&& !test_bit(Faulty, &rdev->flags))
|
||||
@@ -8607,8 +8640,8 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
|
||||
"reshape");
|
||||
rcu_assign_pointer(mddev->sync_thread,
|
||||
md_register_thread(md_do_sync, mddev, "reshape"));
|
||||
if (!mddev->sync_thread) {
|
||||
mddev->recovery = 0;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
@@ -9043,6 +9076,22 @@ static int raid5_start(struct mddev *mddev)
|
||||
return r5l_start(conf->log);
|
||||
}
|
||||
|
||||
static void raid5_prepare_suspend(struct mddev *mddev)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
|
||||
wait_event(mddev->sb_wait, !reshape_inprogress(mddev) ||
|
||||
percpu_ref_is_zero(&mddev->active_io));
|
||||
if (percpu_ref_is_zero(&mddev->active_io))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Reshape is not in progress, and array is suspended, io that is
|
||||
* waiting for reshpape can never be done.
|
||||
*/
|
||||
wake_up(&conf->wait_for_overlap);
|
||||
}
|
||||
|
||||
static struct md_personality raid6_personality =
|
||||
{
|
||||
.name = "raid6",
|
||||
@@ -9063,6 +9112,7 @@ static struct md_personality raid6_personality =
|
||||
.check_reshape = raid6_check_reshape,
|
||||
.start_reshape = raid5_start_reshape,
|
||||
.finish_reshape = raid5_finish_reshape,
|
||||
.prepare_suspend = raid5_prepare_suspend,
|
||||
.quiesce = raid5_quiesce,
|
||||
.takeover = raid6_takeover,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
@@ -9087,6 +9137,7 @@ static struct md_personality raid5_personality =
|
||||
.check_reshape = raid5_check_reshape,
|
||||
.start_reshape = raid5_start_reshape,
|
||||
.finish_reshape = raid5_finish_reshape,
|
||||
.prepare_suspend = raid5_prepare_suspend,
|
||||
.quiesce = raid5_quiesce,
|
||||
.takeover = raid5_takeover,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
@@ -9112,6 +9163,7 @@ static struct md_personality raid4_personality =
|
||||
.check_reshape = raid5_check_reshape,
|
||||
.start_reshape = raid5_start_reshape,
|
||||
.finish_reshape = raid5_finish_reshape,
|
||||
.prepare_suspend = raid5_prepare_suspend,
|
||||
.quiesce = raid5_quiesce,
|
||||
.takeover = raid4_takeover,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
|
||||
@@ -679,7 +679,7 @@ struct r5conf {
|
||||
/* When taking over an array from a different personality, we store
|
||||
* the new thread here until we fully activate the array.
|
||||
*/
|
||||
struct md_thread *thread;
|
||||
struct md_thread __rcu *thread;
|
||||
struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS];
|
||||
struct r5worker_group *worker_groups;
|
||||
int group_cnt;
|
||||
|
||||
22
lib/raid6/neon.h
Normal file
22
lib/raid6/neon.h
Normal file
@@ -0,0 +1,22 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
void raid6_neon1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs);
|
||||
void raid6_neon1_xor_syndrome_real(int disks, int start, int stop,
|
||||
unsigned long bytes, void **ptrs);
|
||||
void raid6_neon2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs);
|
||||
void raid6_neon2_xor_syndrome_real(int disks, int start, int stop,
|
||||
unsigned long bytes, void **ptrs);
|
||||
void raid6_neon4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs);
|
||||
void raid6_neon4_xor_syndrome_real(int disks, int start, int stop,
|
||||
unsigned long bytes, void **ptrs);
|
||||
void raid6_neon8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs);
|
||||
void raid6_neon8_xor_syndrome_real(int disks, int start, int stop,
|
||||
unsigned long bytes, void **ptrs);
|
||||
void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
|
||||
uint8_t *dq, const uint8_t *pbmul,
|
||||
const uint8_t *qmul);
|
||||
|
||||
void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
|
||||
const uint8_t *qmul);
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "neon.h"
|
||||
|
||||
typedef uint8x16_t unative_t;
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <asm/neon.h>
|
||||
#include "neon.h"
|
||||
#else
|
||||
#define kernel_neon_begin()
|
||||
#define kernel_neon_end()
|
||||
@@ -19,13 +20,6 @@ static int raid6_has_neon(void)
|
||||
return cpu_has_neon();
|
||||
}
|
||||
|
||||
void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
|
||||
uint8_t *dq, const uint8_t *pbmul,
|
||||
const uint8_t *qmul);
|
||||
|
||||
void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
|
||||
const uint8_t *qmul);
|
||||
|
||||
static void raid6_2data_recov_neon(int disks, size_t bytes, int faila,
|
||||
int failb, void **ptrs)
|
||||
{
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "neon.h"
|
||||
|
||||
#ifdef CONFIG_ARM
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user