mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-07 18:37:58 -04:00
Merge tag 'md-6.14-20250113' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux into for-6.14/block
Pull MD updates from Song: "1. Reintroduce md-linear, by Yu Kuai. 2. md-bitmap refactor and fix, by Yu Kuai. 3. Replace kmap_atomic with kmap_local_page, by David Reaver." * tag 'md-6.14-20250113' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux: md/md-bitmap: move bitmap_{start, end}write to md upper layer md/raid5: implement pers->bitmap_sector() md: add a new callback pers->bitmap_sector() md/md-bitmap: remove the last parameter for bimtap_ops->endwrite() md/md-bitmap: factor behind write counters out from bitmap_{start/end}write() md: Replace deprecated kmap_atomic() with kmap_local_page() md: reintroduce md-linear
This commit is contained in:
@@ -61,6 +61,19 @@ config MD_BITMAP_FILE
|
||||
various kernel APIs and can only work with files on a file system not
|
||||
actually sitting on the MD device.
|
||||
|
||||
config MD_LINEAR
|
||||
tristate "Linear (append) mode"
|
||||
depends on BLK_DEV_MD
|
||||
help
|
||||
If you say Y here, then your multiple devices driver will be able to
|
||||
use the so-called linear mode, i.e. it will combine the hard disk
|
||||
partitions by simply appending one to the other.
|
||||
|
||||
To compile this as a module, choose M here: the module
|
||||
will be called linear.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config MD_RAID0
|
||||
tristate "RAID-0 (striping) mode"
|
||||
depends on BLK_DEV_MD
|
||||
|
||||
@@ -29,12 +29,14 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
|
||||
|
||||
md-mod-y += md.o md-bitmap.o
|
||||
raid456-y += raid5.o raid5-cache.o raid5-ppl.o
|
||||
linear-y += md-linear.o
|
||||
|
||||
# Note: link order is important. All raid personalities
|
||||
# and must come before md.o, as they each initialise
|
||||
# themselves, and md.o may use the personalities when it
|
||||
# auto-initialised.
|
||||
|
||||
obj-$(CONFIG_MD_LINEAR) += linear.o
|
||||
obj-$(CONFIG_MD_RAID0) += raid0.o
|
||||
obj-$(CONFIG_MD_RAID1) += raid1.o
|
||||
obj-$(CONFIG_MD_RAID10) += raid10.o
|
||||
|
||||
@@ -49,6 +49,7 @@ static int md_setup_ents __initdata;
|
||||
* instead of just one. -- KTK
|
||||
* 18May2000: Added support for persistent-superblock arrays:
|
||||
* md=n,0,factor,fault,device-list uses RAID0 for device n
|
||||
* md=n,-1,factor,fault,device-list uses LINEAR for device n
|
||||
* md=n,device-list reads a RAID superblock from the devices
|
||||
* elements in device-list are read by name_to_kdev_t so can be
|
||||
* a hex number or something like /dev/hda1 /dev/sdb
|
||||
@@ -87,7 +88,7 @@ static int __init md_setup(char *str)
|
||||
md_setup_ents++;
|
||||
switch (get_option(&str, &level)) { /* RAID level */
|
||||
case 2: /* could be 0 or -1.. */
|
||||
if (level == 0) {
|
||||
if (level == 0 || level == LEVEL_LINEAR) {
|
||||
if (get_option(&str, &factor) != 2 || /* Chunk Size */
|
||||
get_option(&str, &fault) != 2) {
|
||||
printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
|
||||
@@ -95,7 +96,10 @@ static int __init md_setup(char *str)
|
||||
}
|
||||
md_setup_args[ent].level = level;
|
||||
md_setup_args[ent].chunk = 1 << (factor+12);
|
||||
pername = "raid0";
|
||||
if (level == LEVEL_LINEAR)
|
||||
pername = "linear";
|
||||
else
|
||||
pername = "raid0";
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
|
||||
@@ -682,7 +682,7 @@ static void bitmap_update_sb(void *data)
|
||||
return;
|
||||
if (!bitmap->storage.sb_page) /* no superblock */
|
||||
return;
|
||||
sb = kmap_atomic(bitmap->storage.sb_page);
|
||||
sb = kmap_local_page(bitmap->storage.sb_page);
|
||||
sb->events = cpu_to_le64(bitmap->mddev->events);
|
||||
if (bitmap->mddev->events < bitmap->events_cleared)
|
||||
/* rocking back to read-only */
|
||||
@@ -702,7 +702,7 @@ static void bitmap_update_sb(void *data)
|
||||
sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
|
||||
sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
|
||||
bitmap_info.space);
|
||||
kunmap_atomic(sb);
|
||||
kunmap_local(sb);
|
||||
|
||||
if (bitmap->storage.file)
|
||||
write_file_page(bitmap, bitmap->storage.sb_page, 1);
|
||||
@@ -717,7 +717,7 @@ static void bitmap_print_sb(struct bitmap *bitmap)
|
||||
|
||||
if (!bitmap || !bitmap->storage.sb_page)
|
||||
return;
|
||||
sb = kmap_atomic(bitmap->storage.sb_page);
|
||||
sb = kmap_local_page(bitmap->storage.sb_page);
|
||||
pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
|
||||
pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic));
|
||||
pr_debug(" version: %u\n", le32_to_cpu(sb->version));
|
||||
@@ -736,7 +736,7 @@ static void bitmap_print_sb(struct bitmap *bitmap)
|
||||
pr_debug(" sync size: %llu KB\n",
|
||||
(unsigned long long)le64_to_cpu(sb->sync_size)/2);
|
||||
pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
|
||||
kunmap_atomic(sb);
|
||||
kunmap_local(sb);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -760,7 +760,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
|
||||
return -ENOMEM;
|
||||
bitmap->storage.sb_index = 0;
|
||||
|
||||
sb = kmap_atomic(bitmap->storage.sb_page);
|
||||
sb = kmap_local_page(bitmap->storage.sb_page);
|
||||
|
||||
sb->magic = cpu_to_le32(BITMAP_MAGIC);
|
||||
sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
|
||||
@@ -768,7 +768,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
|
||||
chunksize = bitmap->mddev->bitmap_info.chunksize;
|
||||
BUG_ON(!chunksize);
|
||||
if (!is_power_of_2(chunksize)) {
|
||||
kunmap_atomic(sb);
|
||||
kunmap_local(sb);
|
||||
pr_warn("bitmap chunksize not a power of 2\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -803,7 +803,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
|
||||
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
|
||||
bitmap->mddev->bitmap_info.nodes = 0;
|
||||
|
||||
kunmap_atomic(sb);
|
||||
kunmap_local(sb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -865,7 +865,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
return err;
|
||||
|
||||
err = -EINVAL;
|
||||
sb = kmap_atomic(sb_page);
|
||||
sb = kmap_local_page(sb_page);
|
||||
|
||||
chunksize = le32_to_cpu(sb->chunksize);
|
||||
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
|
||||
@@ -932,7 +932,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
err = 0;
|
||||
|
||||
out:
|
||||
kunmap_atomic(sb);
|
||||
kunmap_local(sb);
|
||||
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
|
||||
/* Assigning chunksize is required for "re_read" */
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
@@ -1161,12 +1161,12 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
||||
bit = file_page_offset(&bitmap->storage, chunk);
|
||||
|
||||
/* set the bit */
|
||||
kaddr = kmap_atomic(page);
|
||||
kaddr = kmap_local_page(page);
|
||||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||
set_bit(bit, kaddr);
|
||||
else
|
||||
set_bit_le(bit, kaddr);
|
||||
kunmap_atomic(kaddr);
|
||||
kunmap_local(kaddr);
|
||||
pr_debug("set file bit %lu page %lu\n", bit, index);
|
||||
/* record page number so it gets flushed to disk when unplug occurs */
|
||||
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
|
||||
@@ -1190,12 +1190,12 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
||||
if (!page)
|
||||
return;
|
||||
bit = file_page_offset(&bitmap->storage, chunk);
|
||||
paddr = kmap_atomic(page);
|
||||
paddr = kmap_local_page(page);
|
||||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||
clear_bit(bit, paddr);
|
||||
else
|
||||
clear_bit_le(bit, paddr);
|
||||
kunmap_atomic(paddr);
|
||||
kunmap_local(paddr);
|
||||
if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
|
||||
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
|
||||
bitmap->allclean = 0;
|
||||
@@ -1214,12 +1214,12 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
|
||||
if (!page)
|
||||
return -EINVAL;
|
||||
bit = file_page_offset(&bitmap->storage, chunk);
|
||||
paddr = kmap_atomic(page);
|
||||
paddr = kmap_local_page(page);
|
||||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||
set = test_bit(bit, paddr);
|
||||
else
|
||||
set = test_bit_le(bit, paddr);
|
||||
kunmap_atomic(paddr);
|
||||
kunmap_local(paddr);
|
||||
return set;
|
||||
}
|
||||
|
||||
@@ -1388,9 +1388,9 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||
* If the bitmap is out of date, dirty the whole page
|
||||
* and write it out
|
||||
*/
|
||||
paddr = kmap_atomic(page);
|
||||
paddr = kmap_local_page(page);
|
||||
memset(paddr + offset, 0xff, PAGE_SIZE - offset);
|
||||
kunmap_atomic(paddr);
|
||||
kunmap_local(paddr);
|
||||
|
||||
filemap_write_page(bitmap, i, true);
|
||||
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
|
||||
@@ -1406,12 +1406,12 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||
void *paddr;
|
||||
bool was_set;
|
||||
|
||||
paddr = kmap_atomic(page);
|
||||
paddr = kmap_local_page(page);
|
||||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||
was_set = test_bit(bit, paddr);
|
||||
else
|
||||
was_set = test_bit_le(bit, paddr);
|
||||
kunmap_atomic(paddr);
|
||||
kunmap_local(paddr);
|
||||
|
||||
if (was_set) {
|
||||
/* if the disk bit is set, set the memory bit */
|
||||
@@ -1546,10 +1546,10 @@ static void bitmap_daemon_work(struct mddev *mddev)
|
||||
bitmap_super_t *sb;
|
||||
bitmap->need_sync = 0;
|
||||
if (bitmap->storage.filemap) {
|
||||
sb = kmap_atomic(bitmap->storage.sb_page);
|
||||
sb = kmap_local_page(bitmap->storage.sb_page);
|
||||
sb->events_cleared =
|
||||
cpu_to_le64(bitmap->events_cleared);
|
||||
kunmap_atomic(sb);
|
||||
kunmap_local(sb);
|
||||
set_page_attr(bitmap, 0,
|
||||
BITMAP_PAGE_NEEDWRITE);
|
||||
}
|
||||
@@ -1671,24 +1671,13 @@ __acquires(bitmap->lock)
|
||||
}
|
||||
|
||||
static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
|
||||
unsigned long sectors, bool behind)
|
||||
unsigned long sectors)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
|
||||
if (!bitmap)
|
||||
return 0;
|
||||
|
||||
if (behind) {
|
||||
int bw;
|
||||
atomic_inc(&bitmap->behind_writes);
|
||||
bw = atomic_read(&bitmap->behind_writes);
|
||||
if (bw > bitmap->behind_writes_used)
|
||||
bitmap->behind_writes_used = bw;
|
||||
|
||||
pr_debug("inc write-behind count %d/%lu\n",
|
||||
bw, bitmap->mddev->bitmap_info.max_write_behind);
|
||||
}
|
||||
|
||||
while (sectors) {
|
||||
sector_t blocks;
|
||||
bitmap_counter_t *bmc;
|
||||
@@ -1737,21 +1726,13 @@ static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
|
||||
}
|
||||
|
||||
static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
|
||||
unsigned long sectors, bool success, bool behind)
|
||||
unsigned long sectors)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
|
||||
if (!bitmap)
|
||||
return;
|
||||
|
||||
if (behind) {
|
||||
if (atomic_dec_and_test(&bitmap->behind_writes))
|
||||
wake_up(&bitmap->behind_wait);
|
||||
pr_debug("dec write-behind count %d/%lu\n",
|
||||
atomic_read(&bitmap->behind_writes),
|
||||
bitmap->mddev->bitmap_info.max_write_behind);
|
||||
}
|
||||
|
||||
while (sectors) {
|
||||
sector_t blocks;
|
||||
unsigned long flags;
|
||||
@@ -1764,15 +1745,16 @@ static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
|
||||
return;
|
||||
}
|
||||
|
||||
if (success && !bitmap->mddev->degraded &&
|
||||
bitmap->events_cleared < bitmap->mddev->events) {
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->need_sync = 1;
|
||||
sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
|
||||
}
|
||||
|
||||
if (!success && !NEEDED(*bmc))
|
||||
if (!bitmap->mddev->degraded) {
|
||||
if (bitmap->events_cleared < bitmap->mddev->events) {
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->need_sync = 1;
|
||||
sysfs_notify_dirent_safe(
|
||||
bitmap->sysfs_can_clear);
|
||||
}
|
||||
} else if (!NEEDED(*bmc)) {
|
||||
*bmc |= NEEDED_MASK;
|
||||
}
|
||||
|
||||
if (COUNTER(*bmc) == COUNTER_MAX)
|
||||
wake_up(&bitmap->overflow_wait);
|
||||
@@ -2062,6 +2044,37 @@ static void md_bitmap_free(void *data)
|
||||
kfree(bitmap);
|
||||
}
|
||||
|
||||
static void bitmap_start_behind_write(struct mddev *mddev)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
int bw;
|
||||
|
||||
if (!bitmap)
|
||||
return;
|
||||
|
||||
atomic_inc(&bitmap->behind_writes);
|
||||
bw = atomic_read(&bitmap->behind_writes);
|
||||
if (bw > bitmap->behind_writes_used)
|
||||
bitmap->behind_writes_used = bw;
|
||||
|
||||
pr_debug("inc write-behind count %d/%lu\n",
|
||||
bw, bitmap->mddev->bitmap_info.max_write_behind);
|
||||
}
|
||||
|
||||
static void bitmap_end_behind_write(struct mddev *mddev)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
|
||||
if (!bitmap)
|
||||
return;
|
||||
|
||||
if (atomic_dec_and_test(&bitmap->behind_writes))
|
||||
wake_up(&bitmap->behind_wait);
|
||||
pr_debug("dec write-behind count %d/%lu\n",
|
||||
atomic_read(&bitmap->behind_writes),
|
||||
bitmap->mddev->bitmap_info.max_write_behind);
|
||||
}
|
||||
|
||||
static void bitmap_wait_behind_writes(struct mddev *mddev)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
@@ -2981,6 +2994,9 @@ static struct bitmap_operations bitmap_ops = {
|
||||
.dirty_bits = bitmap_dirty_bits,
|
||||
.unplug = bitmap_unplug,
|
||||
.daemon_work = bitmap_daemon_work,
|
||||
|
||||
.start_behind_write = bitmap_start_behind_write,
|
||||
.end_behind_write = bitmap_end_behind_write,
|
||||
.wait_behind_writes = bitmap_wait_behind_writes,
|
||||
|
||||
.startwrite = bitmap_startwrite,
|
||||
|
||||
@@ -84,12 +84,15 @@ struct bitmap_operations {
|
||||
unsigned long e);
|
||||
void (*unplug)(struct mddev *mddev, bool sync);
|
||||
void (*daemon_work)(struct mddev *mddev);
|
||||
|
||||
void (*start_behind_write)(struct mddev *mddev);
|
||||
void (*end_behind_write)(struct mddev *mddev);
|
||||
void (*wait_behind_writes)(struct mddev *mddev);
|
||||
|
||||
int (*startwrite)(struct mddev *mddev, sector_t offset,
|
||||
unsigned long sectors, bool behind);
|
||||
unsigned long sectors);
|
||||
void (*endwrite)(struct mddev *mddev, sector_t offset,
|
||||
unsigned long sectors, bool success, bool behind);
|
||||
unsigned long sectors);
|
||||
bool (*start_sync)(struct mddev *mddev, sector_t offset,
|
||||
sector_t *blocks, bool degraded);
|
||||
void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks);
|
||||
|
||||
354
drivers/md/md-linear.c
Normal file
354
drivers/md/md-linear.c
Normal file
@@ -0,0 +1,354 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* linear.c : Multiple Devices driver for Linux Copyright (C) 1994-96 Marc
|
||||
* ZYNGIER <zyngier@ufr-info-p7.ibp.fr> or <maz@gloups.fdn.fr>
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/raid/md_u.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <trace/events/block.h>
|
||||
#include "md.h"
|
||||
|
||||
struct dev_info {
|
||||
struct md_rdev *rdev;
|
||||
sector_t end_sector;
|
||||
};
|
||||
|
||||
struct linear_conf {
|
||||
struct rcu_head rcu;
|
||||
sector_t array_sectors;
|
||||
/* a copy of mddev->raid_disks */
|
||||
int raid_disks;
|
||||
struct dev_info disks[] __counted_by(raid_disks);
|
||||
};
|
||||
|
||||
/*
|
||||
* find which device holds a particular offset
|
||||
*/
|
||||
static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
|
||||
{
|
||||
int lo, mid, hi;
|
||||
struct linear_conf *conf;
|
||||
|
||||
lo = 0;
|
||||
hi = mddev->raid_disks - 1;
|
||||
conf = mddev->private;
|
||||
|
||||
/*
|
||||
* Binary Search
|
||||
*/
|
||||
|
||||
while (hi > lo) {
|
||||
|
||||
mid = (hi + lo) / 2;
|
||||
if (sector < conf->disks[mid].end_sector)
|
||||
hi = mid;
|
||||
else
|
||||
lo = mid + 1;
|
||||
}
|
||||
|
||||
return conf->disks + lo;
|
||||
}
|
||||
|
||||
static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
|
||||
{
|
||||
struct linear_conf *conf;
|
||||
sector_t array_sectors;
|
||||
|
||||
conf = mddev->private;
|
||||
WARN_ONCE(sectors || raid_disks,
|
||||
"%s does not support generic reshape\n", __func__);
|
||||
array_sectors = conf->array_sectors;
|
||||
|
||||
return array_sectors;
|
||||
}
|
||||
|
||||
static int linear_set_limits(struct mddev *mddev)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
int err;
|
||||
|
||||
md_init_stacking_limits(&lim);
|
||||
lim.max_hw_sectors = mddev->chunk_sectors;
|
||||
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
|
||||
lim.io_min = mddev->chunk_sectors << 9;
|
||||
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
||||
if (err) {
|
||||
queue_limits_cancel_update(mddev->gendisk->queue);
|
||||
return err;
|
||||
}
|
||||
|
||||
return queue_limits_set(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
|
||||
static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||
{
|
||||
struct linear_conf *conf;
|
||||
struct md_rdev *rdev;
|
||||
int ret = -EINVAL;
|
||||
int cnt;
|
||||
int i;
|
||||
|
||||
conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
|
||||
if (!conf)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/*
|
||||
* conf->raid_disks is copy of mddev->raid_disks. The reason to
|
||||
* keep a copy of mddev->raid_disks in struct linear_conf is,
|
||||
* mddev->raid_disks may not be consistent with pointers number of
|
||||
* conf->disks[] when it is updated in linear_add() and used to
|
||||
* iterate old conf->disks[] earray in linear_congested().
|
||||
* Here conf->raid_disks is always consitent with number of
|
||||
* pointers in conf->disks[] array, and mddev->private is updated
|
||||
* with rcu_assign_pointer() in linear_addr(), such race can be
|
||||
* avoided.
|
||||
*/
|
||||
conf->raid_disks = raid_disks;
|
||||
|
||||
cnt = 0;
|
||||
conf->array_sectors = 0;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
int j = rdev->raid_disk;
|
||||
struct dev_info *disk = conf->disks + j;
|
||||
sector_t sectors;
|
||||
|
||||
if (j < 0 || j >= raid_disks || disk->rdev) {
|
||||
pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
|
||||
mdname(mddev));
|
||||
goto out;
|
||||
}
|
||||
|
||||
disk->rdev = rdev;
|
||||
if (mddev->chunk_sectors) {
|
||||
sectors = rdev->sectors;
|
||||
sector_div(sectors, mddev->chunk_sectors);
|
||||
rdev->sectors = sectors * mddev->chunk_sectors;
|
||||
}
|
||||
|
||||
conf->array_sectors += rdev->sectors;
|
||||
cnt++;
|
||||
}
|
||||
if (cnt != raid_disks) {
|
||||
pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
|
||||
mdname(mddev));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we calculate the device offsets.
|
||||
*/
|
||||
conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
|
||||
|
||||
for (i = 1; i < raid_disks; i++)
|
||||
conf->disks[i].end_sector =
|
||||
conf->disks[i-1].end_sector +
|
||||
conf->disks[i].rdev->sectors;
|
||||
|
||||
if (!mddev_is_dm(mddev)) {
|
||||
ret = linear_set_limits(mddev);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
return conf;
|
||||
|
||||
out:
|
||||
kfree(conf);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int linear_run(struct mddev *mddev)
|
||||
{
|
||||
struct linear_conf *conf;
|
||||
int ret;
|
||||
|
||||
if (md_check_no_bitmap(mddev))
|
||||
return -EINVAL;
|
||||
|
||||
conf = linear_conf(mddev, mddev->raid_disks);
|
||||
if (IS_ERR(conf))
|
||||
return PTR_ERR(conf);
|
||||
|
||||
mddev->private = conf;
|
||||
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
|
||||
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret) {
|
||||
kfree(conf);
|
||||
mddev->private = NULL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
/* Adding a drive to a linear array allows the array to grow.
|
||||
* It is permitted if the new drive has a matching superblock
|
||||
* already on it, with raid_disk equal to raid_disks.
|
||||
* It is achieved by creating a new linear_private_data structure
|
||||
* and swapping it in in-place of the current one.
|
||||
* The current one is never freed until the array is stopped.
|
||||
* This avoids races.
|
||||
*/
|
||||
struct linear_conf *newconf, *oldconf;
|
||||
|
||||
if (rdev->saved_raid_disk != mddev->raid_disks)
|
||||
return -EINVAL;
|
||||
|
||||
rdev->raid_disk = rdev->saved_raid_disk;
|
||||
rdev->saved_raid_disk = -1;
|
||||
|
||||
newconf = linear_conf(mddev, mddev->raid_disks + 1);
|
||||
if (!newconf)
|
||||
return -ENOMEM;
|
||||
|
||||
/* newconf->raid_disks already keeps a copy of * the increased
|
||||
* value of mddev->raid_disks, WARN_ONCE() is just used to make
|
||||
* sure of this. It is possible that oldconf is still referenced
|
||||
* in linear_congested(), therefore kfree_rcu() is used to free
|
||||
* oldconf until no one uses it anymore.
|
||||
*/
|
||||
oldconf = rcu_dereference_protected(mddev->private,
|
||||
lockdep_is_held(&mddev->reconfig_mutex));
|
||||
mddev->raid_disks++;
|
||||
WARN_ONCE(mddev->raid_disks != newconf->raid_disks,
|
||||
"copied raid_disks doesn't match mddev->raid_disks");
|
||||
rcu_assign_pointer(mddev->private, newconf);
|
||||
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
|
||||
set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
|
||||
kfree_rcu(oldconf, rcu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void linear_free(struct mddev *mddev, void *priv)
|
||||
{
|
||||
struct linear_conf *conf = priv;
|
||||
|
||||
kfree(conf);
|
||||
}
|
||||
|
||||
static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct dev_info *tmp_dev;
|
||||
sector_t start_sector, end_sector, data_offset;
|
||||
sector_t bio_sector = bio->bi_iter.bi_sector;
|
||||
|
||||
if (unlikely(bio->bi_opf & REQ_PREFLUSH)
|
||||
&& md_flush_request(mddev, bio))
|
||||
return true;
|
||||
|
||||
tmp_dev = which_dev(mddev, bio_sector);
|
||||
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
|
||||
end_sector = tmp_dev->end_sector;
|
||||
data_offset = tmp_dev->rdev->data_offset;
|
||||
|
||||
if (unlikely(bio_sector >= end_sector ||
|
||||
bio_sector < start_sector))
|
||||
goto out_of_bounds;
|
||||
|
||||
if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
|
||||
md_error(mddev, tmp_dev->rdev);
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unlikely(bio_end_sector(bio) > end_sector)) {
|
||||
/* This bio crosses a device boundary, so we have to split it */
|
||||
struct bio *split = bio_split(bio, end_sector - bio_sector,
|
||||
GFP_NOIO, &mddev->bio_set);
|
||||
|
||||
if (IS_ERR(split)) {
|
||||
bio->bi_status = errno_to_blk_status(PTR_ERR(split));
|
||||
bio_endio(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
bio_chain(split, bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
}
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
bio_set_dev(bio, tmp_dev->rdev->bdev);
|
||||
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
|
||||
start_sector + data_offset;
|
||||
|
||||
if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!bdev_max_discard_sectors(bio->bi_bdev))) {
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
} else {
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
|
||||
bio_sector);
|
||||
mddev_check_write_zeroes(mddev, bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
return true;
|
||||
|
||||
out_of_bounds:
|
||||
pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n",
|
||||
mdname(mddev),
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
tmp_dev->rdev->bdev,
|
||||
(unsigned long long)tmp_dev->rdev->sectors,
|
||||
(unsigned long long)start_sector);
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void linear_status(struct seq_file *seq, struct mddev *mddev)
|
||||
{
|
||||
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
|
||||
}
|
||||
|
||||
static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
|
||||
char *md_name = mdname(mddev);
|
||||
|
||||
pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
|
||||
md_name, rdev->bdev);
|
||||
}
|
||||
}
|
||||
|
||||
static void linear_quiesce(struct mddev *mddev, int state)
|
||||
{
|
||||
}
|
||||
|
||||
static struct md_personality linear_personality = {
|
||||
.name = "linear",
|
||||
.level = LEVEL_LINEAR,
|
||||
.owner = THIS_MODULE,
|
||||
.make_request = linear_make_request,
|
||||
.run = linear_run,
|
||||
.free = linear_free,
|
||||
.status = linear_status,
|
||||
.hot_add_disk = linear_add,
|
||||
.size = linear_size,
|
||||
.quiesce = linear_quiesce,
|
||||
.error_handler = linear_error,
|
||||
};
|
||||
|
||||
static int __init linear_init(void)
|
||||
{
|
||||
return register_md_personality(&linear_personality);
|
||||
}
|
||||
|
||||
static void linear_exit(void)
|
||||
{
|
||||
unregister_md_personality(&linear_personality);
|
||||
}
|
||||
|
||||
module_init(linear_init);
|
||||
module_exit(linear_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)");
|
||||
MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
|
||||
MODULE_ALIAS("md-linear");
|
||||
MODULE_ALIAS("md-level--1");
|
||||
@@ -8124,7 +8124,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
return;
|
||||
mddev->pers->error_handler(mddev, rdev);
|
||||
|
||||
if (mddev->pers->level == 0)
|
||||
if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
|
||||
return;
|
||||
|
||||
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
|
||||
@@ -8745,12 +8745,32 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
|
||||
|
||||
static void md_bitmap_start(struct mddev *mddev,
|
||||
struct md_io_clone *md_io_clone)
|
||||
{
|
||||
if (mddev->pers->bitmap_sector)
|
||||
mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
|
||||
&md_io_clone->sectors);
|
||||
|
||||
mddev->bitmap_ops->startwrite(mddev, md_io_clone->offset,
|
||||
md_io_clone->sectors);
|
||||
}
|
||||
|
||||
static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
|
||||
{
|
||||
mddev->bitmap_ops->endwrite(mddev, md_io_clone->offset,
|
||||
md_io_clone->sectors);
|
||||
}
|
||||
|
||||
static void md_end_clone_io(struct bio *bio)
|
||||
{
|
||||
struct md_io_clone *md_io_clone = bio->bi_private;
|
||||
struct bio *orig_bio = md_io_clone->orig_bio;
|
||||
struct mddev *mddev = md_io_clone->mddev;
|
||||
|
||||
if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
|
||||
md_bitmap_end(mddev, md_io_clone);
|
||||
|
||||
if (bio->bi_status && !orig_bio->bi_status)
|
||||
orig_bio->bi_status = bio->bi_status;
|
||||
|
||||
@@ -8775,6 +8795,12 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio)
|
||||
if (blk_queue_io_stat(bdev->bd_disk->queue))
|
||||
md_io_clone->start_time = bio_start_io_acct(*bio);
|
||||
|
||||
if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
|
||||
md_io_clone->offset = (*bio)->bi_iter.bi_sector;
|
||||
md_io_clone->sectors = bio_sectors(*bio);
|
||||
md_bitmap_start(mddev, md_io_clone);
|
||||
}
|
||||
|
||||
clone->bi_end_io = md_end_clone_io;
|
||||
clone->bi_private = md_io_clone;
|
||||
*bio = clone;
|
||||
@@ -8793,6 +8819,9 @@ void md_free_cloned_bio(struct bio *bio)
|
||||
struct bio *orig_bio = md_io_clone->orig_bio;
|
||||
struct mddev *mddev = md_io_clone->mddev;
|
||||
|
||||
if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
|
||||
md_bitmap_end(mddev, md_io_clone);
|
||||
|
||||
if (bio->bi_status && !orig_bio->bi_status)
|
||||
orig_bio->bi_status = bio->bi_status;
|
||||
|
||||
|
||||
@@ -746,6 +746,9 @@ struct md_personality
|
||||
void *(*takeover) (struct mddev *mddev);
|
||||
/* Changes the consistency policy of an active array. */
|
||||
int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
|
||||
/* convert io ranges from array to bitmap */
|
||||
void (*bitmap_sector)(struct mddev *mddev, sector_t *offset,
|
||||
unsigned long *sectors);
|
||||
};
|
||||
|
||||
struct md_sysfs_entry {
|
||||
@@ -828,6 +831,8 @@ struct md_io_clone {
|
||||
struct mddev *mddev;
|
||||
struct bio *orig_bio;
|
||||
unsigned long start_time;
|
||||
sector_t offset;
|
||||
unsigned long sectors;
|
||||
struct bio bio_clone;
|
||||
};
|
||||
|
||||
|
||||
@@ -420,10 +420,8 @@ static void close_write(struct r1bio *r1_bio)
|
||||
r1_bio->behind_master_bio = NULL;
|
||||
}
|
||||
|
||||
/* clear the bitmap if all writes complete successfully */
|
||||
mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors,
|
||||
!test_bit(R1BIO_Degraded, &r1_bio->state),
|
||||
test_bit(R1BIO_BehindIO, &r1_bio->state));
|
||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
|
||||
mddev->bitmap_ops->end_behind_write(mddev);
|
||||
md_write_end(mddev);
|
||||
}
|
||||
|
||||
@@ -480,8 +478,6 @@ static void raid1_end_write_request(struct bio *bio)
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
set_bit(R1BIO_WriteError, &r1_bio->state);
|
||||
else {
|
||||
/* Fail the request */
|
||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||
/* Finished with this branch */
|
||||
r1_bio->bios[mirror] = NULL;
|
||||
to_put = bio;
|
||||
@@ -1535,11 +1531,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
write_behind = true;
|
||||
|
||||
r1_bio->bios[i] = NULL;
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
||||
if (i < conf->raid_disks)
|
||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
}
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
if (test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
@@ -1558,16 +1551,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
*/
|
||||
max_sectors = bad_sectors;
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
/* We don't set R1BIO_Degraded as that
|
||||
* only applies if the disk is
|
||||
* missing, so it might be re-added,
|
||||
* and we want to know to recover this
|
||||
* chunk.
|
||||
* In this case the device is here,
|
||||
* and the fact that this chunk is not
|
||||
* in-sync is recorded in the bad
|
||||
* block log
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
if (is_bad) {
|
||||
@@ -1645,9 +1628,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
stats.behind_writes < max_write_behind)
|
||||
alloc_behind_master_bio(r1_bio, bio);
|
||||
|
||||
mddev->bitmap_ops->startwrite(
|
||||
mddev, r1_bio->sector, r1_bio->sectors,
|
||||
test_bit(R1BIO_BehindIO, &r1_bio->state));
|
||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
|
||||
mddev->bitmap_ops->start_behind_write(mddev);
|
||||
first_clone = 0;
|
||||
}
|
||||
|
||||
@@ -2614,12 +2596,10 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
* errors.
|
||||
*/
|
||||
fail = true;
|
||||
if (!narrow_write_error(r1_bio, m)) {
|
||||
if (!narrow_write_error(r1_bio, m))
|
||||
md_error(conf->mddev,
|
||||
conf->mirrors[m].rdev);
|
||||
/* an I/O failed, we can't clear the bitmap */
|
||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||
}
|
||||
rdev_dec_pending(conf->mirrors[m].rdev,
|
||||
conf->mddev);
|
||||
}
|
||||
@@ -2710,8 +2690,6 @@ static void raid1d(struct md_thread *thread)
|
||||
list_del(&r1_bio->retry_list);
|
||||
idx = sector_to_idx(r1_bio->sector);
|
||||
atomic_dec(&conf->nr_queued[idx]);
|
||||
if (mddev->degraded)
|
||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||
if (test_bit(R1BIO_WriteError, &r1_bio->state))
|
||||
close_write(r1_bio);
|
||||
raid_end_bio_io(r1_bio);
|
||||
|
||||
@@ -188,7 +188,6 @@ struct r1bio {
|
||||
enum r1bio_state {
|
||||
R1BIO_Uptodate,
|
||||
R1BIO_IsSync,
|
||||
R1BIO_Degraded,
|
||||
R1BIO_BehindIO,
|
||||
/* Set ReadError on bios that experience a readerror so that
|
||||
* raid1d knows what to do with them.
|
||||
|
||||
@@ -428,10 +428,6 @@ static void close_write(struct r10bio *r10_bio)
|
||||
{
|
||||
struct mddev *mddev = r10_bio->mddev;
|
||||
|
||||
/* clear the bitmap if all writes complete successfully */
|
||||
mddev->bitmap_ops->endwrite(mddev, r10_bio->sector, r10_bio->sectors,
|
||||
!test_bit(R10BIO_Degraded, &r10_bio->state),
|
||||
false);
|
||||
md_write_end(mddev);
|
||||
}
|
||||
|
||||
@@ -501,7 +497,6 @@ static void raid10_end_write_request(struct bio *bio)
|
||||
set_bit(R10BIO_WriteError, &r10_bio->state);
|
||||
else {
|
||||
/* Fail the request */
|
||||
set_bit(R10BIO_Degraded, &r10_bio->state);
|
||||
r10_bio->devs[slot].bio = NULL;
|
||||
to_put = bio;
|
||||
dec_rdev = 1;
|
||||
@@ -1438,10 +1433,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r10_bio->devs[i].bio = NULL;
|
||||
r10_bio->devs[i].repl_bio = NULL;
|
||||
|
||||
if (!rdev && !rrdev) {
|
||||
set_bit(R10BIO_Degraded, &r10_bio->state);
|
||||
if (!rdev && !rrdev)
|
||||
continue;
|
||||
}
|
||||
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
sector_t first_bad;
|
||||
sector_t dev_sector = r10_bio->devs[i].addr;
|
||||
@@ -1458,14 +1451,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
* to other devices yet
|
||||
*/
|
||||
max_sectors = bad_sectors;
|
||||
/* We don't set R10BIO_Degraded as that
|
||||
* only applies if the disk is missing,
|
||||
* so it might be re-added, and we want to
|
||||
* know to recover this chunk.
|
||||
* In this case the device is here, and the
|
||||
* fact that this chunk is not in-sync is
|
||||
* recorded in the bad block log.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
if (is_bad) {
|
||||
@@ -1519,8 +1504,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
md_account_bio(mddev, &bio);
|
||||
r10_bio->master_bio = bio;
|
||||
atomic_set(&r10_bio->remaining, 1);
|
||||
mddev->bitmap_ops->startwrite(mddev, r10_bio->sector, r10_bio->sectors,
|
||||
false);
|
||||
|
||||
for (i = 0; i < conf->copies; i++) {
|
||||
if (r10_bio->devs[i].bio)
|
||||
@@ -2966,11 +2949,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
} else if (bio != NULL && bio->bi_status) {
|
||||
fail = true;
|
||||
if (!narrow_write_error(r10_bio, m)) {
|
||||
if (!narrow_write_error(r10_bio, m))
|
||||
md_error(conf->mddev, rdev);
|
||||
set_bit(R10BIO_Degraded,
|
||||
&r10_bio->state);
|
||||
}
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
bio = r10_bio->devs[m].repl_bio;
|
||||
@@ -3029,8 +3009,6 @@ static void raid10d(struct md_thread *thread)
|
||||
r10_bio = list_first_entry(&tmp, struct r10bio,
|
||||
retry_list);
|
||||
list_del(&r10_bio->retry_list);
|
||||
if (mddev->degraded)
|
||||
set_bit(R10BIO_Degraded, &r10_bio->state);
|
||||
|
||||
if (test_bit(R10BIO_WriteError,
|
||||
&r10_bio->state))
|
||||
|
||||
@@ -161,7 +161,6 @@ enum r10bio_state {
|
||||
R10BIO_IsSync,
|
||||
R10BIO_IsRecover,
|
||||
R10BIO_IsReshape,
|
||||
R10BIO_Degraded,
|
||||
/* Set ReadError on bios that experience a read error
|
||||
* so that raid10d knows what to do with them.
|
||||
*/
|
||||
|
||||
@@ -313,10 +313,6 @@ void r5c_handle_cached_data_endio(struct r5conf *conf,
|
||||
if (sh->dev[i].written) {
|
||||
set_bit(R5_UPTODATE, &sh->dev[i].flags);
|
||||
r5c_return_dev_pending_writes(conf, &sh->dev[i]);
|
||||
conf->mddev->bitmap_ops->endwrite(conf->mddev,
|
||||
sh->sector, RAID5_STRIPE_SECTORS(conf),
|
||||
!test_bit(STRIPE_DEGRADED, &sh->state),
|
||||
false);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1023,10 +1019,10 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
|
||||
/* checksum is already calculated in last run */
|
||||
if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
|
||||
continue;
|
||||
addr = kmap_atomic(sh->dev[i].page);
|
||||
addr = kmap_local_page(sh->dev[i].page);
|
||||
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
|
||||
addr, PAGE_SIZE);
|
||||
kunmap_atomic(addr);
|
||||
kunmap_local(addr);
|
||||
}
|
||||
parity_pages = 1 + !!(sh->qd_idx >= 0);
|
||||
data_pages = write_disks - parity_pages;
|
||||
@@ -1979,9 +1975,9 @@ r5l_recovery_verify_data_checksum(struct r5l_log *log,
|
||||
u32 checksum;
|
||||
|
||||
r5l_recovery_read_page(log, ctx, page, log_offset);
|
||||
addr = kmap_atomic(page);
|
||||
addr = kmap_local_page(page);
|
||||
checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
|
||||
kunmap_atomic(addr);
|
||||
kunmap_local(addr);
|
||||
return (le32_to_cpu(log_checksum) == checksum) ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
@@ -2381,11 +2377,11 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
|
||||
payload->size = cpu_to_le32(BLOCK_SECTORS);
|
||||
payload->location = cpu_to_le64(
|
||||
raid5_compute_blocknr(sh, i, 0));
|
||||
addr = kmap_atomic(dev->page);
|
||||
addr = kmap_local_page(dev->page);
|
||||
payload->checksum[0] = cpu_to_le32(
|
||||
crc32c_le(log->uuid_checksum, addr,
|
||||
PAGE_SIZE));
|
||||
kunmap_atomic(addr);
|
||||
kunmap_local(addr);
|
||||
sync_page_io(log->rdev, write_pos, PAGE_SIZE,
|
||||
dev->page, REQ_OP_WRITE, false);
|
||||
write_pos = r5l_ring_add(log, write_pos,
|
||||
@@ -2888,10 +2884,10 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
|
||||
|
||||
if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
|
||||
continue;
|
||||
addr = kmap_atomic(sh->dev[i].page);
|
||||
addr = kmap_local_page(sh->dev[i].page);
|
||||
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
|
||||
addr, PAGE_SIZE);
|
||||
kunmap_atomic(addr);
|
||||
kunmap_local(addr);
|
||||
pages++;
|
||||
}
|
||||
WARN_ON(pages == 0);
|
||||
|
||||
@@ -906,8 +906,7 @@ static bool stripe_can_batch(struct stripe_head *sh)
|
||||
if (raid5_has_log(conf) || raid5_has_ppl(conf))
|
||||
return false;
|
||||
return test_bit(STRIPE_BATCH_READY, &sh->state) &&
|
||||
!test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
|
||||
is_full_stripe_write(sh);
|
||||
is_full_stripe_write(sh);
|
||||
}
|
||||
|
||||
/* we only do back search */
|
||||
@@ -1345,8 +1344,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
submit_bio_noacct(rbi);
|
||||
}
|
||||
if (!rdev && !rrdev) {
|
||||
if (op_is_write(op))
|
||||
set_bit(STRIPE_DEGRADED, &sh->state);
|
||||
pr_debug("skip op %d on disc %d for sector %llu\n",
|
||||
bi->bi_opf, i, (unsigned long long)sh->sector);
|
||||
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
||||
@@ -2884,7 +2881,6 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
|
||||
} else {
|
||||
if (bi->bi_status) {
|
||||
set_bit(STRIPE_DEGRADED, &sh->state);
|
||||
set_bit(WriteErrorSeen, &rdev->flags);
|
||||
set_bit(R5_WriteError, &sh->dev[i].flags);
|
||||
if (!test_and_set_bit(WantReplacement, &rdev->flags))
|
||||
@@ -3548,29 +3544,9 @@ static void __add_stripe_bio(struct stripe_head *sh, struct bio *bi,
|
||||
(*bip)->bi_iter.bi_sector, sh->sector, dd_idx,
|
||||
sh->dev[dd_idx].sector);
|
||||
|
||||
if (conf->mddev->bitmap && firstwrite) {
|
||||
/* Cannot hold spinlock over bitmap_startwrite,
|
||||
* but must ensure this isn't added to a batch until
|
||||
* we have added to the bitmap and set bm_seq.
|
||||
* So set STRIPE_BITMAP_PENDING to prevent
|
||||
* batching.
|
||||
* If multiple __add_stripe_bio() calls race here they
|
||||
* much all set STRIPE_BITMAP_PENDING. So only the first one
|
||||
* to complete "bitmap_startwrite" gets to set
|
||||
* STRIPE_BIT_DELAY. This is important as once a stripe
|
||||
* is added to a batch, STRIPE_BIT_DELAY cannot be changed
|
||||
* any more.
|
||||
*/
|
||||
set_bit(STRIPE_BITMAP_PENDING, &sh->state);
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
conf->mddev->bitmap_ops->startwrite(conf->mddev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), false);
|
||||
spin_lock_irq(&sh->stripe_lock);
|
||||
clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
|
||||
if (!sh->batch_head) {
|
||||
sh->bm_seq = conf->seq_flush+1;
|
||||
set_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||
}
|
||||
if (conf->mddev->bitmap && firstwrite && !sh->batch_head) {
|
||||
sh->bm_seq = conf->seq_flush+1;
|
||||
set_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3621,7 +3597,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
BUG_ON(sh->batch_head);
|
||||
for (i = disks; i--; ) {
|
||||
struct bio *bi;
|
||||
int bitmap_end = 0;
|
||||
|
||||
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
@@ -3646,8 +3621,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
sh->dev[i].towrite = NULL;
|
||||
sh->overwrite_disks = 0;
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
if (bi)
|
||||
bitmap_end = 1;
|
||||
|
||||
log_stripe_write_finished(sh);
|
||||
|
||||
@@ -3662,11 +3635,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
bio_io_error(bi);
|
||||
bi = nextbi;
|
||||
}
|
||||
if (bitmap_end)
|
||||
conf->mddev->bitmap_ops->endwrite(conf->mddev,
|
||||
sh->sector, RAID5_STRIPE_SECTORS(conf),
|
||||
false, false);
|
||||
bitmap_end = 0;
|
||||
/* and fail all 'written' */
|
||||
bi = sh->dev[i].written;
|
||||
sh->dev[i].written = NULL;
|
||||
@@ -3675,7 +3643,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
sh->dev[i].page = sh->dev[i].orig_page;
|
||||
}
|
||||
|
||||
if (bi) bitmap_end = 1;
|
||||
while (bi && bi->bi_iter.bi_sector <
|
||||
sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
|
||||
struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector);
|
||||
@@ -3709,10 +3676,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
bi = nextbi;
|
||||
}
|
||||
}
|
||||
if (bitmap_end)
|
||||
conf->mddev->bitmap_ops->endwrite(conf->mddev,
|
||||
sh->sector, RAID5_STRIPE_SECTORS(conf),
|
||||
false, false);
|
||||
/* If we were in the middle of a write the parity block might
|
||||
* still be locked - so just clear all R5_LOCKED flags
|
||||
*/
|
||||
@@ -4061,10 +4024,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
|
||||
bio_endio(wbi);
|
||||
wbi = wbi2;
|
||||
}
|
||||
conf->mddev->bitmap_ops->endwrite(conf->mddev,
|
||||
sh->sector, RAID5_STRIPE_SECTORS(conf),
|
||||
!test_bit(STRIPE_DEGRADED, &sh->state),
|
||||
false);
|
||||
|
||||
if (head_sh->batch_head) {
|
||||
sh = list_first_entry(&sh->batch_list,
|
||||
struct stripe_head,
|
||||
@@ -4341,7 +4301,6 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
|
||||
s->locked++;
|
||||
set_bit(R5_Wantwrite, &dev->flags);
|
||||
|
||||
clear_bit(STRIPE_DEGRADED, &sh->state);
|
||||
set_bit(STRIPE_INSYNC, &sh->state);
|
||||
break;
|
||||
case check_state_run:
|
||||
@@ -4498,7 +4457,6 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
|
||||
clear_bit(R5_Wantwrite, &dev->flags);
|
||||
s->locked--;
|
||||
}
|
||||
clear_bit(STRIPE_DEGRADED, &sh->state);
|
||||
|
||||
set_bit(STRIPE_INSYNC, &sh->state);
|
||||
break;
|
||||
@@ -4891,8 +4849,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
||||
(1 << STRIPE_COMPUTE_RUN) |
|
||||
(1 << STRIPE_DISCARD) |
|
||||
(1 << STRIPE_BATCH_READY) |
|
||||
(1 << STRIPE_BATCH_ERR) |
|
||||
(1 << STRIPE_BITMAP_PENDING)),
|
||||
(1 << STRIPE_BATCH_ERR)),
|
||||
"stripe state: %lx\n", sh->state);
|
||||
WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
|
||||
(1 << STRIPE_REPLACED)),
|
||||
@@ -4900,7 +4857,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
||||
|
||||
set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
|
||||
(1 << STRIPE_PREREAD_ACTIVE) |
|
||||
(1 << STRIPE_DEGRADED) |
|
||||
(1 << STRIPE_ON_UNPLUG_LIST)),
|
||||
head_sh->state & (1 << STRIPE_INSYNC));
|
||||
|
||||
@@ -5784,10 +5740,6 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
|
||||
}
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
if (conf->mddev->bitmap) {
|
||||
for (d = 0; d < conf->raid_disks - conf->max_degraded;
|
||||
d++)
|
||||
mddev->bitmap_ops->startwrite(mddev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), false);
|
||||
sh->bm_seq = conf->seq_flush + 1;
|
||||
set_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||
}
|
||||
@@ -5928,6 +5880,54 @@ static enum reshape_loc get_reshape_loc(struct mddev *mddev,
|
||||
return LOC_BEHIND_RESHAPE;
|
||||
}
|
||||
|
||||
static void raid5_bitmap_sector(struct mddev *mddev, sector_t *offset,
|
||||
unsigned long *sectors)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
sector_t start = *offset;
|
||||
sector_t end = start + *sectors;
|
||||
sector_t prev_start = start;
|
||||
sector_t prev_end = end;
|
||||
int sectors_per_chunk;
|
||||
enum reshape_loc loc;
|
||||
int dd_idx;
|
||||
|
||||
sectors_per_chunk = conf->chunk_sectors *
|
||||
(conf->raid_disks - conf->max_degraded);
|
||||
start = round_down(start, sectors_per_chunk);
|
||||
end = round_up(end, sectors_per_chunk);
|
||||
|
||||
start = raid5_compute_sector(conf, start, 0, &dd_idx, NULL);
|
||||
end = raid5_compute_sector(conf, end, 0, &dd_idx, NULL);
|
||||
|
||||
/*
|
||||
* For LOC_INSIDE_RESHAPE, this IO will wait for reshape to make
|
||||
* progress, hence it's the same as LOC_BEHIND_RESHAPE.
|
||||
*/
|
||||
loc = get_reshape_loc(mddev, conf, prev_start);
|
||||
if (likely(loc != LOC_AHEAD_OF_RESHAPE)) {
|
||||
*offset = start;
|
||||
*sectors = end - start;
|
||||
return;
|
||||
}
|
||||
|
||||
sectors_per_chunk = conf->prev_chunk_sectors *
|
||||
(conf->previous_raid_disks - conf->max_degraded);
|
||||
prev_start = round_down(prev_start, sectors_per_chunk);
|
||||
prev_end = round_down(prev_end, sectors_per_chunk);
|
||||
|
||||
prev_start = raid5_compute_sector(conf, prev_start, 1, &dd_idx, NULL);
|
||||
prev_end = raid5_compute_sector(conf, prev_end, 1, &dd_idx, NULL);
|
||||
|
||||
/*
|
||||
* for LOC_AHEAD_OF_RESHAPE, reshape can make progress before this IO
|
||||
* is handled in make_stripe_request(), we can't know this here hence
|
||||
* we set bits for both.
|
||||
*/
|
||||
*offset = min(start, prev_start);
|
||||
*sectors = max(end, prev_end) - *offset;
|
||||
}
|
||||
|
||||
static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
struct r5conf *conf, struct stripe_request_ctx *ctx,
|
||||
sector_t logical_sector, struct bio *bi)
|
||||
@@ -8976,6 +8976,7 @@ static struct md_personality raid6_personality =
|
||||
.takeover = raid6_takeover,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
.prepare_suspend = raid5_prepare_suspend,
|
||||
.bitmap_sector = raid5_bitmap_sector,
|
||||
};
|
||||
static struct md_personality raid5_personality =
|
||||
{
|
||||
@@ -9001,6 +9002,7 @@ static struct md_personality raid5_personality =
|
||||
.takeover = raid5_takeover,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
.prepare_suspend = raid5_prepare_suspend,
|
||||
.bitmap_sector = raid5_bitmap_sector,
|
||||
};
|
||||
|
||||
static struct md_personality raid4_personality =
|
||||
@@ -9027,6 +9029,7 @@ static struct md_personality raid4_personality =
|
||||
.takeover = raid4_takeover,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
.prepare_suspend = raid5_prepare_suspend,
|
||||
.bitmap_sector = raid5_bitmap_sector,
|
||||
};
|
||||
|
||||
static int __init raid5_init(void)
|
||||
|
||||
@@ -358,7 +358,6 @@ enum {
|
||||
STRIPE_REPLACED,
|
||||
STRIPE_PREREAD_ACTIVE,
|
||||
STRIPE_DELAYED,
|
||||
STRIPE_DEGRADED,
|
||||
STRIPE_BIT_DELAY,
|
||||
STRIPE_EXPANDING,
|
||||
STRIPE_EXPAND_SOURCE,
|
||||
@@ -372,9 +371,6 @@ enum {
|
||||
STRIPE_ON_RELEASE_LIST,
|
||||
STRIPE_BATCH_READY,
|
||||
STRIPE_BATCH_ERR,
|
||||
STRIPE_BITMAP_PENDING, /* Being added to bitmap, don't add
|
||||
* to batch yet.
|
||||
*/
|
||||
STRIPE_LOG_TRAPPED, /* trapped into log (see raid5-cache.c)
|
||||
* this bit is used in two scenarios:
|
||||
*
|
||||
|
||||
@@ -233,7 +233,7 @@ struct mdp_superblock_1 {
|
||||
char set_name[32]; /* set and interpreted by user-space */
|
||||
|
||||
__le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
|
||||
__le32 level; /* 0,1,4,5 */
|
||||
__le32 level; /* 0,1,4,5, -1 (linear) */
|
||||
__le32 layout; /* only for raid5 and raid10 currently */
|
||||
__le64 size; /* used size of component devices, in 512byte sectors */
|
||||
|
||||
|
||||
@@ -103,6 +103,8 @@ typedef struct mdu_array_info_s {
|
||||
|
||||
} mdu_array_info_t;
|
||||
|
||||
#define LEVEL_LINEAR (-1)
|
||||
|
||||
/* we need a value for 'no level specified' and 0
|
||||
* means 'raid0', so we need something else. This is
|
||||
* for internal use only
|
||||
|
||||
Reference in New Issue
Block a user