mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-02 05:07:46 -04:00
Merge tag 'md-next-20230729' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.6/block
Pull MD updates from Song: "1. Deprecate bitmap file support, by Christoph Hellwig; 2. Fix deadlock with md sync thread, by Yu Kuai; 3. Refactor md io accounting, by Yu Kuai; 4. Various non-urgent fixes by Li Nan, Yu Kuai, and Jack Wang." * tag 'md-next-20230729' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: (36 commits) md/md-bitmap: hold 'reconfig_mutex' in backlog_store() md/md-bitmap: remove unnecessary local variable in backlog_store() md/raid10: use dereference_rdev_and_rrdev() to get devices md/raid10: factor out dereference_rdev_and_rrdev() md/raid10: check replacement and rdev to prevent submit the same io twice md/raid1: Avoid lock contention from wake_up() md: restore 'noio_flag' for the last mddev_resume() md: don't quiesce in mddev_suspend() md: remove redundant check in fix_read_error() md/raid10: optimize fix_read_error md/raid1: prioritize adding disk to 'removed' mirror md/md-faulty: enable io accounting md/md-linear: enable io accounting md/md-multipath: enable io accounting md/raid10: switch to use md_account_bio() for io accounting md/raid1: switch to use md_account_bio() for io accounting raid5: fix missing io accounting in raid5_align_endio() md: also clone new io if io accounting is disabled md: move initialization and destruction of 'io_acct_set' to md.c md: deprecate bitmap file support ...
This commit is contained in:
@@ -50,6 +50,16 @@ config MD_AUTODETECT
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config MD_BITMAP_FILE
|
||||
bool "MD bitmap file support (deprecated)"
|
||||
default y
|
||||
help
|
||||
If you say Y here, support for write intent bitmaps in files on an
|
||||
external file system is enabled. This is an alternative to the internal
|
||||
bitmaps near the MD superblock, and very problematic code that abuses
|
||||
various kernel APIs and can only work with files on a file system not
|
||||
actually sitting on the MD device.
|
||||
|
||||
config MD_LINEAR
|
||||
tristate "Linear (append) mode (deprecated)"
|
||||
depends on BLK_DEV_MD
|
||||
|
||||
@@ -3725,7 +3725,6 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
|
||||
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
|
||||
|
||||
@@ -139,29 +139,26 @@ static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page
|
||||
*/
|
||||
|
||||
/* IO operations when bitmap is stored near all superblocks */
|
||||
static int read_sb_page(struct mddev *mddev, loff_t offset,
|
||||
struct page *page,
|
||||
unsigned long index, int size)
|
||||
{
|
||||
/* choose a good rdev and read the page from there */
|
||||
|
||||
/* choose a good rdev and read the page from there */
|
||||
static int read_sb_page(struct mddev *mddev, loff_t offset,
|
||||
struct page *page, unsigned long index, int size)
|
||||
{
|
||||
|
||||
sector_t sector = mddev->bitmap_info.offset + offset +
|
||||
index * (PAGE_SIZE / SECTOR_SIZE);
|
||||
struct md_rdev *rdev;
|
||||
sector_t target;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (! test_bit(In_sync, &rdev->flags)
|
||||
|| test_bit(Faulty, &rdev->flags)
|
||||
|| test_bit(Bitmap_sync, &rdev->flags))
|
||||
u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev));
|
||||
|
||||
if (!test_bit(In_sync, &rdev->flags) ||
|
||||
test_bit(Faulty, &rdev->flags) ||
|
||||
test_bit(Bitmap_sync, &rdev->flags))
|
||||
continue;
|
||||
|
||||
target = offset + index * (PAGE_SIZE/512);
|
||||
|
||||
if (sync_page_io(rdev, target,
|
||||
roundup(size, bdev_logical_block_size(rdev->bdev)),
|
||||
page, REQ_OP_READ, true)) {
|
||||
page->index = index;
|
||||
if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -EIO;
|
||||
}
|
||||
@@ -225,18 +222,19 @@ static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
|
||||
}
|
||||
|
||||
static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
struct page *page)
|
||||
unsigned long pg_index, struct page *page)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
loff_t sboff, offset = mddev->bitmap_info.offset;
|
||||
sector_t ps, doff;
|
||||
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
|
||||
unsigned int size = PAGE_SIZE;
|
||||
unsigned int opt_size = PAGE_SIZE;
|
||||
sector_t doff;
|
||||
|
||||
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
|
||||
if (page->index == store->file_pages - 1) {
|
||||
if (pg_index == store->file_pages - 1) {
|
||||
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
|
||||
|
||||
if (last_page_size == 0)
|
||||
@@ -245,7 +243,6 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
opt_size = optimal_io_size(bdev, last_page_size, size);
|
||||
}
|
||||
|
||||
ps = page->index * PAGE_SIZE / SECTOR_SIZE;
|
||||
sboff = rdev->sb_start + offset;
|
||||
doff = rdev->data_offset;
|
||||
|
||||
@@ -279,55 +276,41 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index,
|
||||
struct page *page, bool wait)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
rdev = NULL;
|
||||
struct md_rdev *rdev = NULL;
|
||||
|
||||
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
|
||||
ret = __write_sb_page(rdev, bitmap, page);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) {
|
||||
set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
|
||||
return;
|
||||
}
|
||||
}
|
||||
} while (wait && md_super_wait(mddev) < 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void md_bitmap_file_kick(struct bitmap *bitmap);
|
||||
/*
|
||||
* write out a page to a file
|
||||
*/
|
||||
static void write_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
|
||||
#ifdef CONFIG_MD_BITMAP_FILE
|
||||
static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
struct buffer_head *bh = page_buffers(page);
|
||||
|
||||
if (bitmap->storage.file == NULL) {
|
||||
switch (write_sb_page(bitmap, page, wait)) {
|
||||
case -EINVAL:
|
||||
set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
|
||||
}
|
||||
} else {
|
||||
|
||||
bh = page_buffers(page);
|
||||
|
||||
while (bh && bh->b_blocknr) {
|
||||
atomic_inc(&bitmap->pending_writes);
|
||||
set_buffer_locked(bh);
|
||||
set_buffer_mapped(bh);
|
||||
submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
|
||||
bh = bh->b_this_page;
|
||||
}
|
||||
|
||||
if (wait)
|
||||
wait_event(bitmap->write_wait,
|
||||
atomic_read(&bitmap->pending_writes)==0);
|
||||
while (bh && bh->b_blocknr) {
|
||||
atomic_inc(&bitmap->pending_writes);
|
||||
set_buffer_locked(bh);
|
||||
set_buffer_mapped(bh);
|
||||
submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
|
||||
bh = bh->b_this_page;
|
||||
}
|
||||
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
|
||||
md_bitmap_file_kick(bitmap);
|
||||
|
||||
if (wait)
|
||||
wait_event(bitmap->write_wait,
|
||||
atomic_read(&bitmap->pending_writes) == 0);
|
||||
}
|
||||
|
||||
static void end_bitmap_write(struct buffer_head *bh, int uptodate)
|
||||
@@ -364,10 +347,8 @@ static void free_buffers(struct page *page)
|
||||
* This usage is similar to how swap files are handled, and allows us
|
||||
* to write to a file with no concerns of memory allocation failing.
|
||||
*/
|
||||
static int read_page(struct file *file, unsigned long index,
|
||||
struct bitmap *bitmap,
|
||||
unsigned long count,
|
||||
struct page *page)
|
||||
static int read_file_page(struct file *file, unsigned long index,
|
||||
struct bitmap *bitmap, unsigned long count, struct page *page)
|
||||
{
|
||||
int ret = 0;
|
||||
struct inode *inode = file_inode(file);
|
||||
@@ -415,7 +396,6 @@ static int read_page(struct file *file, unsigned long index,
|
||||
blk_cur++;
|
||||
bh = bh->b_this_page;
|
||||
}
|
||||
page->index = index;
|
||||
|
||||
wait_event(bitmap->write_wait,
|
||||
atomic_read(&bitmap->pending_writes)==0);
|
||||
@@ -429,11 +409,45 @@ static int read_page(struct file *file, unsigned long index,
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
#else /* CONFIG_MD_BITMAP_FILE */
|
||||
static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
{
|
||||
}
|
||||
static int read_file_page(struct file *file, unsigned long index,
|
||||
struct bitmap *bitmap, unsigned long count, struct page *page)
|
||||
{
|
||||
return -EIO;
|
||||
}
|
||||
static void free_buffers(struct page *page)
|
||||
{
|
||||
put_page(page);
|
||||
}
|
||||
#endif /* CONFIG_MD_BITMAP_FILE */
|
||||
|
||||
/*
|
||||
* bitmap file superblock operations
|
||||
*/
|
||||
|
||||
/*
|
||||
* write out a page to a file
|
||||
*/
|
||||
static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
|
||||
bool wait)
|
||||
{
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
struct page *page = store->filemap[pg_index];
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev)) {
|
||||
pg_index += bitmap->cluster_slot *
|
||||
DIV_ROUND_UP(store->bytes, PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (store->file)
|
||||
write_file_page(bitmap, page, wait);
|
||||
else
|
||||
write_sb_page(bitmap, pg_index, page, wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* md_bitmap_wait_writes() should be called before writing any bitmap
|
||||
* blocks, to ensure previous writes, particularly from
|
||||
@@ -488,7 +502,12 @@ void md_bitmap_update_sb(struct bitmap *bitmap)
|
||||
sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
|
||||
bitmap_info.space);
|
||||
kunmap_atomic(sb);
|
||||
write_page(bitmap, bitmap->storage.sb_page, 1);
|
||||
|
||||
if (bitmap->storage.file)
|
||||
write_file_page(bitmap, bitmap->storage.sb_page, 1);
|
||||
else
|
||||
write_sb_page(bitmap, bitmap->storage.sb_index,
|
||||
bitmap->storage.sb_page, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(md_bitmap_update_sb);
|
||||
|
||||
@@ -540,7 +559,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
|
||||
bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (bitmap->storage.sb_page == NULL)
|
||||
return -ENOMEM;
|
||||
bitmap->storage.sb_page->index = 0;
|
||||
bitmap->storage.sb_index = 0;
|
||||
|
||||
sb = kmap_atomic(bitmap->storage.sb_page);
|
||||
|
||||
@@ -601,7 +620,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
unsigned long sectors_reserved = 0;
|
||||
int err = -EINVAL;
|
||||
struct page *sb_page;
|
||||
loff_t offset = bitmap->mddev->bitmap_info.offset;
|
||||
loff_t offset = 0;
|
||||
|
||||
if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
|
||||
chunksize = 128 * 1024 * 1024;
|
||||
@@ -628,7 +647,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
|
||||
/* to 4k blocks */
|
||||
bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
|
||||
offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
|
||||
offset = bitmap->cluster_slot * (bm_blocks << 3);
|
||||
pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
|
||||
bitmap->cluster_slot, offset);
|
||||
}
|
||||
@@ -637,13 +656,11 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
|
||||
int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
|
||||
|
||||
err = read_page(bitmap->storage.file, 0,
|
||||
err = read_file_page(bitmap->storage.file, 0,
|
||||
bitmap, bytes, sb_page);
|
||||
} else {
|
||||
err = read_sb_page(bitmap->mddev,
|
||||
offset,
|
||||
sb_page,
|
||||
0, sizeof(bitmap_super_t));
|
||||
err = read_sb_page(bitmap->mddev, offset, sb_page, 0,
|
||||
sizeof(bitmap_super_t));
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
@@ -819,7 +836,7 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store,
|
||||
if (store->sb_page) {
|
||||
store->filemap[0] = store->sb_page;
|
||||
pnum = 1;
|
||||
store->sb_page->index = offset;
|
||||
store->sb_index = offset;
|
||||
}
|
||||
|
||||
for ( ; pnum < num_pages; pnum++) {
|
||||
@@ -828,7 +845,6 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store,
|
||||
store->file_pages = pnum;
|
||||
return -ENOMEM;
|
||||
}
|
||||
store->filemap[pnum]->index = pnum + offset;
|
||||
}
|
||||
store->file_pages = pnum;
|
||||
|
||||
@@ -847,14 +863,10 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store,
|
||||
|
||||
static void md_bitmap_file_unmap(struct bitmap_storage *store)
|
||||
{
|
||||
struct page **map, *sb_page;
|
||||
int pages;
|
||||
struct file *file;
|
||||
|
||||
file = store->file;
|
||||
map = store->filemap;
|
||||
pages = store->file_pages;
|
||||
sb_page = store->sb_page;
|
||||
struct file *file = store->file;
|
||||
struct page *sb_page = store->sb_page;
|
||||
struct page **map = store->filemap;
|
||||
int pages = store->file_pages;
|
||||
|
||||
while (pages--)
|
||||
if (map[pages] != sb_page) /* 0 is sb_page, release it below */
|
||||
@@ -879,21 +891,13 @@ static void md_bitmap_file_unmap(struct bitmap_storage *store)
|
||||
*/
|
||||
static void md_bitmap_file_kick(struct bitmap *bitmap)
|
||||
{
|
||||
char *path, *ptr = NULL;
|
||||
|
||||
if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
|
||||
md_bitmap_update_sb(bitmap);
|
||||
|
||||
if (bitmap->storage.file) {
|
||||
path = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
if (path)
|
||||
ptr = file_path(bitmap->storage.file,
|
||||
path, PAGE_SIZE);
|
||||
pr_warn("%s: kicking failed bitmap file %pD4 from array!\n",
|
||||
bmname(bitmap), bitmap->storage.file);
|
||||
|
||||
pr_warn("%s: kicking failed bitmap file %s from array!\n",
|
||||
bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
|
||||
|
||||
kfree(path);
|
||||
} else
|
||||
pr_warn("%s: disabling internal bitmap due to errors\n",
|
||||
bmname(bitmap));
|
||||
@@ -945,6 +949,7 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
||||
void *kaddr;
|
||||
unsigned long chunk = block >> bitmap->counts.chunkshift;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
unsigned long index = file_page_index(store, chunk);
|
||||
unsigned long node_offset = 0;
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev))
|
||||
@@ -962,9 +967,9 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
||||
else
|
||||
set_bit_le(bit, kaddr);
|
||||
kunmap_atomic(kaddr);
|
||||
pr_debug("set file bit %lu page %lu\n", bit, page->index);
|
||||
pr_debug("set file bit %lu page %lu\n", bit, index);
|
||||
/* record page number so it gets flushed to disk when unplug occurs */
|
||||
set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY);
|
||||
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
|
||||
}
|
||||
|
||||
static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
||||
@@ -974,6 +979,7 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
||||
void *paddr;
|
||||
unsigned long chunk = block >> bitmap->counts.chunkshift;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
unsigned long index = file_page_index(store, chunk);
|
||||
unsigned long node_offset = 0;
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev))
|
||||
@@ -989,8 +995,8 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
||||
else
|
||||
clear_bit_le(bit, paddr);
|
||||
kunmap_atomic(paddr);
|
||||
if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
|
||||
set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING);
|
||||
if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
|
||||
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
|
||||
bitmap->allclean = 0;
|
||||
}
|
||||
}
|
||||
@@ -1042,7 +1048,7 @@ void md_bitmap_unplug(struct bitmap *bitmap)
|
||||
"md bitmap_unplug");
|
||||
}
|
||||
clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
|
||||
write_page(bitmap, bitmap->storage.filemap[i], 0);
|
||||
filemap_write_page(bitmap, i, false);
|
||||
writing = 1;
|
||||
}
|
||||
}
|
||||
@@ -1084,33 +1090,31 @@ void md_bitmap_unplug_async(struct bitmap *bitmap)
|
||||
EXPORT_SYMBOL(md_bitmap_unplug_async);
|
||||
|
||||
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
|
||||
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
|
||||
* the in-memory bitmap from the on-disk bitmap -- also, sets up the
|
||||
* memory mapping of the bitmap file
|
||||
* Special cases:
|
||||
* if there's no bitmap file, or if the bitmap file had been
|
||||
* previously kicked from the array, we mark all the bits as
|
||||
* 1's in order to cause a full resync.
|
||||
|
||||
/*
|
||||
* Initialize the in-memory bitmap from the on-disk bitmap and set up the memory
|
||||
* mapping of the bitmap file.
|
||||
*
|
||||
* Special case: If there's no bitmap file, or if the bitmap file had been
|
||||
* previously kicked from the array, we mark all the bits as 1's in order to
|
||||
* cause a full resync.
|
||||
*
|
||||
* We ignore all bits for sectors that end earlier than 'start'.
|
||||
* This is used when reading an out-of-date bitmap...
|
||||
* This is used when reading an out-of-date bitmap.
|
||||
*/
|
||||
static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||
{
|
||||
unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
|
||||
struct page *page = NULL;
|
||||
unsigned long bit_cnt = 0;
|
||||
struct file *file;
|
||||
unsigned long offset;
|
||||
int outofdate;
|
||||
int ret = -ENOSPC;
|
||||
void *paddr;
|
||||
bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
unsigned long chunks = bitmap->counts.chunks;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
struct file *file = store->file;
|
||||
unsigned long node_offset = 0;
|
||||
unsigned long bit_cnt = 0;
|
||||
unsigned long i;
|
||||
int ret;
|
||||
|
||||
chunks = bitmap->counts.chunks;
|
||||
file = store->file;
|
||||
|
||||
if (!file && !bitmap->mddev->bitmap_info.offset) {
|
||||
if (!file && !mddev->bitmap_info.offset) {
|
||||
/* No permanent bitmap - fill with '1s'. */
|
||||
store->filemap = NULL;
|
||||
store->file_pages = 0;
|
||||
@@ -1125,77 +1129,79 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||
return 0;
|
||||
}
|
||||
|
||||
outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
|
||||
if (outofdate)
|
||||
pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap));
|
||||
|
||||
if (file && i_size_read(file->f_mapping->host) < store->bytes) {
|
||||
pr_warn("%s: bitmap file too short %lu < %lu\n",
|
||||
bmname(bitmap),
|
||||
(unsigned long) i_size_read(file->f_mapping->host),
|
||||
store->bytes);
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
oldindex = ~0L;
|
||||
offset = 0;
|
||||
if (!bitmap->mddev->bitmap_info.external)
|
||||
offset = sizeof(bitmap_super_t);
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev))
|
||||
if (mddev_is_clustered(mddev))
|
||||
node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
|
||||
|
||||
for (i = 0; i < chunks; i++) {
|
||||
int b;
|
||||
index = file_page_index(&bitmap->storage, i);
|
||||
bit = file_page_offset(&bitmap->storage, i);
|
||||
if (index != oldindex) { /* this is a new page, read it in */
|
||||
int count;
|
||||
/* unmap the old page, we're done with it */
|
||||
if (index == store->file_pages-1)
|
||||
count = store->bytes - index * PAGE_SIZE;
|
||||
else
|
||||
count = PAGE_SIZE;
|
||||
page = store->filemap[index];
|
||||
if (file)
|
||||
ret = read_page(file, index, bitmap,
|
||||
count, page);
|
||||
else
|
||||
ret = read_sb_page(
|
||||
bitmap->mddev,
|
||||
bitmap->mddev->bitmap_info.offset,
|
||||
page,
|
||||
index + node_offset, count);
|
||||
for (i = 0; i < store->file_pages; i++) {
|
||||
struct page *page = store->filemap[i];
|
||||
int count;
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
/* unmap the old page, we're done with it */
|
||||
if (i == store->file_pages - 1)
|
||||
count = store->bytes - i * PAGE_SIZE;
|
||||
else
|
||||
count = PAGE_SIZE;
|
||||
|
||||
oldindex = index;
|
||||
if (file)
|
||||
ret = read_file_page(file, i, bitmap, count, page);
|
||||
else
|
||||
ret = read_sb_page(mddev, 0, page, i + node_offset,
|
||||
count);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (outofdate) {
|
||||
/*
|
||||
* if bitmap is out of date, dirty the
|
||||
* whole page and write it out
|
||||
*/
|
||||
paddr = kmap_atomic(page);
|
||||
memset(paddr + offset, 0xff,
|
||||
PAGE_SIZE - offset);
|
||||
kunmap_atomic(paddr);
|
||||
write_page(bitmap, page, 1);
|
||||
if (outofdate) {
|
||||
pr_warn("%s: bitmap file is out of date, doing full recovery\n",
|
||||
bmname(bitmap));
|
||||
|
||||
for (i = 0; i < store->file_pages; i++) {
|
||||
struct page *page = store->filemap[i];
|
||||
unsigned long offset = 0;
|
||||
void *paddr;
|
||||
|
||||
if (i == 0 && !mddev->bitmap_info.external)
|
||||
offset = sizeof(bitmap_super_t);
|
||||
|
||||
/*
|
||||
* If the bitmap is out of date, dirty the whole page
|
||||
* and write it out
|
||||
*/
|
||||
paddr = kmap_atomic(page);
|
||||
memset(paddr + offset, 0xff, PAGE_SIZE - offset);
|
||||
kunmap_atomic(paddr);
|
||||
|
||||
filemap_write_page(bitmap, i, true);
|
||||
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
|
||||
ret = -EIO;
|
||||
if (test_bit(BITMAP_WRITE_ERROR,
|
||||
&bitmap->flags))
|
||||
goto err;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < chunks; i++) {
|
||||
struct page *page = filemap_get_page(&bitmap->storage, i);
|
||||
unsigned long bit = file_page_offset(&bitmap->storage, i);
|
||||
void *paddr;
|
||||
bool was_set;
|
||||
|
||||
paddr = kmap_atomic(page);
|
||||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||
b = test_bit(bit, paddr);
|
||||
was_set = test_bit(bit, paddr);
|
||||
else
|
||||
b = test_bit_le(bit, paddr);
|
||||
was_set = test_bit_le(bit, paddr);
|
||||
kunmap_atomic(paddr);
|
||||
if (b) {
|
||||
|
||||
if (was_set) {
|
||||
/* if the disk bit is set, set the memory bit */
|
||||
int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
|
||||
>= start);
|
||||
@@ -1204,7 +1210,6 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||
needed);
|
||||
bit_cnt++;
|
||||
}
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
|
||||
@@ -1396,9 +1401,8 @@ void md_bitmap_daemon_work(struct mddev *mddev)
|
||||
break;
|
||||
if (bitmap->storage.filemap &&
|
||||
test_and_clear_page_attr(bitmap, j,
|
||||
BITMAP_PAGE_NEEDWRITE)) {
|
||||
write_page(bitmap, bitmap->storage.filemap[j], 0);
|
||||
}
|
||||
BITMAP_PAGE_NEEDWRITE))
|
||||
filemap_write_page(bitmap, j, false);
|
||||
}
|
||||
|
||||
done:
|
||||
@@ -2542,6 +2546,10 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (backlog > COUNTER_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
rv = mddev_lock(mddev);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
||||
/*
|
||||
* Without write mostly device, it doesn't make sense to set
|
||||
* backlog for max_write_behind.
|
||||
@@ -2555,6 +2563,7 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (!has_write_mostly) {
|
||||
pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
|
||||
mdname(mddev));
|
||||
mddev_unlock(mddev);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -2565,13 +2574,13 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
mddev_destroy_serial_pool(mddev, NULL, false);
|
||||
} else if (backlog && !mddev->serial_info_pool) {
|
||||
/* serial_info_pool is needed since backlog is not zero */
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
mddev_create_serial_pool(mddev, rdev, false);
|
||||
}
|
||||
if (old_mwb != backlog)
|
||||
md_bitmap_update_sb(mddev->bitmap);
|
||||
|
||||
mddev_unlock(mddev);
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
@@ -201,6 +201,7 @@ struct bitmap {
|
||||
struct file *file; /* backing disk file */
|
||||
struct page *sb_page; /* cached copy of the bitmap
|
||||
* file superblock */
|
||||
unsigned long sb_index;
|
||||
struct page **filemap; /* list of cache pages for
|
||||
* the file */
|
||||
unsigned long *filemap_attr; /* attributes associated
|
||||
|
||||
@@ -204,6 +204,8 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
|
||||
failit = 1;
|
||||
}
|
||||
}
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
if (failit) {
|
||||
struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO,
|
||||
&mddev->bio_set);
|
||||
|
||||
@@ -238,6 +238,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
bio = split;
|
||||
}
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
bio_set_dev(bio, tmp_dev->rdev->bdev);
|
||||
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
|
||||
start_sector + data_offset;
|
||||
|
||||
@@ -107,6 +107,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
|
||||
&& md_flush_request(mddev, bio))
|
||||
return true;
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
|
||||
|
||||
mp_bh->master_bio = bio;
|
||||
|
||||
219
drivers/md/md.c
219
drivers/md/md.c
@@ -453,7 +453,6 @@ void mddev_suspend(struct mddev *mddev)
|
||||
mddev->pers->prepare_suspend(mddev);
|
||||
|
||||
wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
||||
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
|
||||
|
||||
@@ -465,14 +464,15 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
|
||||
|
||||
void mddev_resume(struct mddev *mddev)
|
||||
{
|
||||
/* entred the memalloc scope from mddev_suspend() */
|
||||
memalloc_noio_restore(mddev->noio_flag);
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
if (--mddev->suspended)
|
||||
return;
|
||||
|
||||
/* entred the memalloc scope from mddev_suspend() */
|
||||
memalloc_noio_restore(mddev->noio_flag);
|
||||
|
||||
percpu_ref_resurrect(&mddev->active_io);
|
||||
wake_up(&mddev->sb_wait);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
@@ -643,6 +643,7 @@ void mddev_init(struct mddev *mddev)
|
||||
{
|
||||
mutex_init(&mddev->open_mutex);
|
||||
mutex_init(&mddev->reconfig_mutex);
|
||||
mutex_init(&mddev->sync_mutex);
|
||||
mutex_init(&mddev->bitmap_info.mutex);
|
||||
INIT_LIST_HEAD(&mddev->disks);
|
||||
INIT_LIST_HEAD(&mddev->all_mddevs);
|
||||
@@ -650,6 +651,7 @@ void mddev_init(struct mddev *mddev)
|
||||
timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
|
||||
atomic_set(&mddev->active, 1);
|
||||
atomic_set(&mddev->openers, 0);
|
||||
atomic_set(&mddev->sync_seq, 0);
|
||||
spin_lock_init(&mddev->lock);
|
||||
atomic_set(&mddev->flush_pending, 0);
|
||||
init_waitqueue_head(&mddev->sb_wait);
|
||||
@@ -2304,7 +2306,7 @@ int md_integrity_register(struct mddev *mddev)
|
||||
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
|
||||
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
|
||||
(mddev->level != 1 && mddev->level != 10 &&
|
||||
bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
|
||||
bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
|
||||
/*
|
||||
* No need to handle the failure of bioset_integrity_create,
|
||||
* because the function is called by md_run() -> pers->run(),
|
||||
@@ -4747,6 +4749,62 @@ action_show(struct mddev *mddev, char *page)
|
||||
return sprintf(page, "%s\n", type);
|
||||
}
|
||||
|
||||
static void stop_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return;
|
||||
|
||||
if (mddev_lock(mddev))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Check again in case MD_RECOVERY_RUNNING is cleared before lock is
|
||||
* held.
|
||||
*/
|
||||
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
mddev_unlock(mddev);
|
||||
return;
|
||||
}
|
||||
|
||||
if (work_pending(&mddev->del_work))
|
||||
flush_workqueue(md_misc_wq);
|
||||
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
/*
|
||||
* Thread might be blocked waiting for metadata update which will now
|
||||
* never happen
|
||||
*/
|
||||
md_wakeup_thread_directly(mddev->sync_thread);
|
||||
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
|
||||
static void idle_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
int sync_seq = atomic_read(&mddev->sync_seq);
|
||||
|
||||
mutex_lock(&mddev->sync_mutex);
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev);
|
||||
|
||||
wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
|
||||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
|
||||
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
}
|
||||
|
||||
static void frozen_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
mutex_lock(&mddev->sync_mutex);
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev);
|
||||
|
||||
wait_event(resync_wait, mddev->sync_thread == NULL &&
|
||||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
|
||||
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
{
|
||||
@@ -4754,35 +4812,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
return -EINVAL;
|
||||
|
||||
|
||||
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
|
||||
if (cmd_match(page, "frozen"))
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
else
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
|
||||
mddev_lock(mddev) == 0) {
|
||||
if (work_pending(&mddev->del_work))
|
||||
flush_workqueue(md_misc_wq);
|
||||
if (mddev->sync_thread) {
|
||||
sector_t save_rp = mddev->reshape_position;
|
||||
|
||||
mddev_unlock(mddev);
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
mddev_lock_nointr(mddev);
|
||||
/*
|
||||
* set RECOVERY_INTR again and restore reshape
|
||||
* position in case others changed them after
|
||||
* got lock, eg, reshape_position_store and
|
||||
* md_check_recovery.
|
||||
*/
|
||||
mddev->reshape_position = save_rp;
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
if (cmd_match(page, "idle"))
|
||||
idle_sync_thread(mddev);
|
||||
else if (cmd_match(page, "frozen"))
|
||||
frozen_sync_thread(mddev);
|
||||
else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return -EBUSY;
|
||||
else if (cmd_match(page, "resync"))
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
@@ -5842,6 +5876,13 @@ int md_run(struct mddev *mddev)
|
||||
goto exit_bio_set;
|
||||
}
|
||||
|
||||
if (!bioset_initialized(&mddev->io_clone_set)) {
|
||||
err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
|
||||
offsetof(struct md_io_clone, bio_clone), 0);
|
||||
if (err)
|
||||
goto exit_sync_set;
|
||||
}
|
||||
|
||||
spin_lock(&pers_lock);
|
||||
pers = find_pers(mddev->level, mddev->clevel);
|
||||
if (!pers || !try_module_get(pers->owner)) {
|
||||
@@ -6019,6 +6060,8 @@ int md_run(struct mddev *mddev)
|
||||
module_put(pers->owner);
|
||||
md_bitmap_destroy(mddev);
|
||||
abort:
|
||||
bioset_exit(&mddev->io_clone_set);
|
||||
exit_sync_set:
|
||||
bioset_exit(&mddev->sync_set);
|
||||
exit_bio_set:
|
||||
bioset_exit(&mddev->bio_set);
|
||||
@@ -6176,7 +6219,6 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||
flush_workqueue(md_misc_wq);
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
|
||||
@@ -6243,6 +6285,7 @@ static void __md_stop(struct mddev *mddev)
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
bioset_exit(&mddev->bio_set);
|
||||
bioset_exit(&mddev->sync_set);
|
||||
bioset_exit(&mddev->io_clone_set);
|
||||
}
|
||||
|
||||
void md_stop(struct mddev *mddev)
|
||||
@@ -7010,6 +7053,15 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
|
||||
|
||||
if (mddev->bitmap || mddev->bitmap_info.file)
|
||||
return -EEXIST; /* cannot add when bitmap is present */
|
||||
|
||||
if (!IS_ENABLED(CONFIG_MD_BITMAP_FILE)) {
|
||||
pr_warn("%s: bitmap files not supported by this kernel\n",
|
||||
mdname(mddev));
|
||||
return -EINVAL;
|
||||
}
|
||||
pr_warn("%s: using deprecated bitmap file support\n",
|
||||
mdname(mddev));
|
||||
|
||||
f = fget(fd);
|
||||
|
||||
if (f == NULL) {
|
||||
@@ -8599,63 +8651,45 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
|
||||
|
||||
int acct_bioset_init(struct mddev *mddev)
|
||||
static void md_end_clone_io(struct bio *bio)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!bioset_initialized(&mddev->io_acct_set))
|
||||
err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
|
||||
offsetof(struct md_io_acct, bio_clone), 0);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acct_bioset_init);
|
||||
|
||||
void acct_bioset_exit(struct mddev *mddev)
|
||||
{
|
||||
bioset_exit(&mddev->io_acct_set);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acct_bioset_exit);
|
||||
|
||||
static void md_end_io_acct(struct bio *bio)
|
||||
{
|
||||
struct md_io_acct *md_io_acct = bio->bi_private;
|
||||
struct bio *orig_bio = md_io_acct->orig_bio;
|
||||
struct mddev *mddev = md_io_acct->mddev;
|
||||
struct md_io_clone *md_io_clone = bio->bi_private;
|
||||
struct bio *orig_bio = md_io_clone->orig_bio;
|
||||
struct mddev *mddev = md_io_clone->mddev;
|
||||
|
||||
orig_bio->bi_status = bio->bi_status;
|
||||
|
||||
bio_end_io_acct(orig_bio, md_io_acct->start_time);
|
||||
if (md_io_clone->start_time)
|
||||
bio_end_io_acct(orig_bio, md_io_clone->start_time);
|
||||
|
||||
bio_put(bio);
|
||||
bio_endio(orig_bio);
|
||||
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by personalities that don't already clone the bio and thus can't
|
||||
* easily add the timestamp to their extended bio structure.
|
||||
*/
|
||||
void md_account_bio(struct mddev *mddev, struct bio **bio)
|
||||
static void md_clone_bio(struct mddev *mddev, struct bio **bio)
|
||||
{
|
||||
struct block_device *bdev = (*bio)->bi_bdev;
|
||||
struct md_io_acct *md_io_acct;
|
||||
struct bio *clone;
|
||||
struct md_io_clone *md_io_clone;
|
||||
struct bio *clone =
|
||||
bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
|
||||
|
||||
if (!blk_queue_io_stat(bdev->bd_disk->queue))
|
||||
return;
|
||||
md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
|
||||
md_io_clone->orig_bio = *bio;
|
||||
md_io_clone->mddev = mddev;
|
||||
if (blk_queue_io_stat(bdev->bd_disk->queue))
|
||||
md_io_clone->start_time = bio_start_io_acct(*bio);
|
||||
|
||||
percpu_ref_get(&mddev->active_io);
|
||||
|
||||
clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
|
||||
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
|
||||
md_io_acct->orig_bio = *bio;
|
||||
md_io_acct->start_time = bio_start_io_acct(*bio);
|
||||
md_io_acct->mddev = mddev;
|
||||
|
||||
clone->bi_end_io = md_end_io_acct;
|
||||
clone->bi_private = md_io_acct;
|
||||
clone->bi_end_io = md_end_clone_io;
|
||||
clone->bi_private = md_io_clone;
|
||||
*bio = clone;
|
||||
}
|
||||
|
||||
void md_account_bio(struct mddev *mddev, struct bio **bio)
|
||||
{
|
||||
percpu_ref_get(&mddev->active_io);
|
||||
md_clone_bio(mddev, bio);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_account_bio);
|
||||
|
||||
/* md_allow_write(mddev)
|
||||
@@ -9327,7 +9361,6 @@ void md_check_recovery(struct mddev *mddev)
|
||||
* ->spare_active and clear saved_raid_disk
|
||||
*/
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
@@ -9356,17 +9389,24 @@ void md_check_recovery(struct mddev *mddev)
|
||||
if (mddev->sb_flags)
|
||||
md_update_sb(mddev, 0);
|
||||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
|
||||
/* resync/recovery still happening */
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
goto unlock;
|
||||
}
|
||||
if (mddev->sync_thread) {
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
/*
|
||||
* Never start a new sync thread if MD_RECOVERY_RUNNING is
|
||||
* still set.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
|
||||
/* resync/recovery still happening */
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(!mddev->sync_thread))
|
||||
goto unlock;
|
||||
|
||||
md_reap_sync_thread(mddev);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* Set RUNNING before clearing NEEDED to avoid
|
||||
* any transients in the value of "sync_action".
|
||||
*/
|
||||
@@ -9443,7 +9483,10 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
sector_t old_dev_sectors = mddev->dev_sectors;
|
||||
bool is_reshaped = false;
|
||||
|
||||
/* sync_thread should be unregistered, collect result */
|
||||
/* resync has finished, collect result */
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
atomic_inc(&mddev->sync_seq);
|
||||
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||
mddev->degraded != mddev->raid_disks) {
|
||||
@@ -9488,7 +9531,6 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
if (mddev_is_clustered(mddev) && is_reshaped
|
||||
&& !test_bit(MD_CLOSING, &mddev->flags))
|
||||
md_cluster_ops->update_size(mddev, old_dev_sectors);
|
||||
wake_up(&resync_wait);
|
||||
/* flag recovery needed just to double check */
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||
@@ -9496,6 +9538,7 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
md_new_event();
|
||||
if (mddev->event_work.func)
|
||||
queue_work(md_misc_wq, &mddev->event_work);
|
||||
wake_up(&resync_wait);
|
||||
}
|
||||
EXPORT_SYMBOL(md_reap_sync_thread);
|
||||
|
||||
|
||||
@@ -510,7 +510,7 @@ struct mddev {
|
||||
struct bio_set sync_set; /* for sync operations like
|
||||
* metadata and bitmap writes
|
||||
*/
|
||||
struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
|
||||
struct bio_set io_clone_set;
|
||||
|
||||
/* Generic flush handling.
|
||||
* The last to finish preflush schedules a worker to submit
|
||||
@@ -535,6 +535,11 @@ struct mddev {
|
||||
*/
|
||||
struct list_head deleting;
|
||||
|
||||
/* Used to synchronize idle and frozen for action_store() */
|
||||
struct mutex sync_mutex;
|
||||
/* The sequence number for sync thread */
|
||||
atomic_t sync_seq;
|
||||
|
||||
bool has_superblocks:1;
|
||||
bool fail_last_dev:1;
|
||||
bool serialize_policy:1;
|
||||
@@ -731,7 +736,7 @@ struct md_thread {
|
||||
void *private;
|
||||
};
|
||||
|
||||
struct md_io_acct {
|
||||
struct md_io_clone {
|
||||
struct mddev *mddev;
|
||||
struct bio *orig_bio;
|
||||
unsigned long start_time;
|
||||
@@ -769,8 +774,6 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
|
||||
extern void md_finish_reshape(struct mddev *mddev);
|
||||
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
|
||||
struct bio *bio, sector_t start, sector_t size);
|
||||
int acct_bioset_init(struct mddev *mddev);
|
||||
void acct_bioset_exit(struct mddev *mddev);
|
||||
void md_account_bio(struct mddev *mddev, struct bio **bio);
|
||||
|
||||
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
|
||||
|
||||
@@ -377,7 +377,6 @@ static void raid0_free(struct mddev *mddev, void *priv)
|
||||
struct r0conf *conf = priv;
|
||||
|
||||
free_conf(mddev, conf);
|
||||
acct_bioset_exit(mddev);
|
||||
}
|
||||
|
||||
static int raid0_run(struct mddev *mddev)
|
||||
@@ -392,16 +391,11 @@ static int raid0_run(struct mddev *mddev)
|
||||
if (md_check_no_bitmap(mddev))
|
||||
return -EINVAL;
|
||||
|
||||
if (acct_bioset_init(mddev)) {
|
||||
pr_err("md/raid0:%s: alloc acct bioset failed.\n", mdname(mddev));
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* if private is not null, we are here after takeover */
|
||||
if (mddev->private == NULL) {
|
||||
ret = create_strip_zones(mddev, &conf);
|
||||
if (ret < 0)
|
||||
goto exit_acct_set;
|
||||
return ret;
|
||||
mddev->private = conf;
|
||||
}
|
||||
conf = mddev->private;
|
||||
@@ -432,15 +426,9 @@ static int raid0_run(struct mddev *mddev)
|
||||
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret)
|
||||
goto free;
|
||||
free_conf(mddev, conf);
|
||||
|
||||
return ret;
|
||||
|
||||
free:
|
||||
free_conf(mddev, conf);
|
||||
exit_acct_set:
|
||||
acct_bioset_exit(mddev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -304,8 +304,6 @@ static void call_bio_endio(struct r1bio *r1_bio)
|
||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
bio_end_io_acct(bio, r1_bio->start_time);
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
@@ -791,11 +789,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
return best_disk;
|
||||
}
|
||||
|
||||
static void wake_up_barrier(struct r1conf *conf)
|
||||
{
|
||||
if (wq_has_sleeper(&conf->wait_barrier))
|
||||
wake_up(&conf->wait_barrier);
|
||||
}
|
||||
|
||||
static void flush_bio_list(struct r1conf *conf, struct bio *bio)
|
||||
{
|
||||
/* flush any pending bitmap writes to disk before proceeding w/ I/O */
|
||||
raid1_prepare_flush_writes(conf->mddev->bitmap);
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
|
||||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
@@ -972,7 +976,7 @@ static bool _wait_barrier(struct r1conf *conf, int idx, bool nowait)
|
||||
* In case freeze_array() is waiting for
|
||||
* get_unqueued_pending() == extra
|
||||
*/
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
/* Wait for the barrier in same barrier unit bucket to drop. */
|
||||
|
||||
/* Return false when nowait flag is set */
|
||||
@@ -1015,7 +1019,7 @@ static bool wait_read_barrier(struct r1conf *conf, sector_t sector_nr, bool nowa
|
||||
* In case freeze_array() is waiting for
|
||||
* get_unqueued_pending() == extra
|
||||
*/
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
/* Wait for array to be unfrozen */
|
||||
|
||||
/* Return false when nowait flag is set */
|
||||
@@ -1044,7 +1048,7 @@ static bool wait_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait)
|
||||
static void _allow_barrier(struct r1conf *conf, int idx)
|
||||
{
|
||||
atomic_dec(&conf->nr_pending[idx]);
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
}
|
||||
|
||||
static void allow_barrier(struct r1conf *conf, sector_t sector_nr)
|
||||
@@ -1173,7 +1177,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
bio_list_merge(&conf->pending_bio_list, &plug->pending);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
kfree(plug);
|
||||
return;
|
||||
@@ -1303,10 +1307,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
}
|
||||
|
||||
r1_bio->read_disk = rdisk;
|
||||
|
||||
if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
r1_bio->start_time = bio_start_io_acct(bio);
|
||||
|
||||
if (!r1bio_existed) {
|
||||
md_account_bio(mddev, &bio);
|
||||
r1_bio->master_bio = bio;
|
||||
}
|
||||
read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp,
|
||||
&mddev->bio_set);
|
||||
|
||||
@@ -1500,8 +1504,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r1_bio->sectors = max_sectors;
|
||||
}
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
r1_bio->start_time = bio_start_io_acct(bio);
|
||||
md_account_bio(mddev, &bio);
|
||||
r1_bio->master_bio = bio;
|
||||
atomic_set(&r1_bio->remaining, 1);
|
||||
atomic_set(&r1_bio->behind_remaining, 0);
|
||||
|
||||
@@ -1576,7 +1580,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r1_bio_write_done(r1_bio);
|
||||
|
||||
/* In case raid1d snuck in to freeze_array */
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
}
|
||||
|
||||
static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
|
||||
@@ -1766,7 +1770,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct r1conf *conf = mddev->private;
|
||||
int err = -EEXIST;
|
||||
int mirror = 0;
|
||||
int mirror = 0, repl_slot = -1;
|
||||
struct raid1_info *p;
|
||||
int first = 0;
|
||||
int last = conf->raid_disks - 1;
|
||||
@@ -1809,17 +1813,21 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
break;
|
||||
}
|
||||
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||
p[conf->raid_disks].rdev == NULL) {
|
||||
/* Add this device as a replacement */
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
rdev->raid_disk = mirror;
|
||||
err = 0;
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
|
||||
break;
|
||||
}
|
||||
p[conf->raid_disks].rdev == NULL && repl_slot < 0)
|
||||
repl_slot = mirror;
|
||||
}
|
||||
|
||||
if (err && repl_slot >= 0) {
|
||||
/* Add this device as a replacement */
|
||||
p = conf->mirrors + repl_slot;
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
rdev->raid_disk = repl_slot;
|
||||
err = 0;
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
|
||||
}
|
||||
|
||||
print_conf(conf);
|
||||
return err;
|
||||
}
|
||||
@@ -2299,7 +2307,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||
d++;
|
||||
if (d == conf->raid_disks * 2)
|
||||
d = 0;
|
||||
} while (!success && d != read_disk);
|
||||
} while (d != read_disk);
|
||||
|
||||
if (!success) {
|
||||
/* Cannot read from anywhere - mark it bad */
|
||||
|
||||
@@ -157,7 +157,6 @@ struct r1bio {
|
||||
sector_t sector;
|
||||
int sectors;
|
||||
unsigned long state;
|
||||
unsigned long start_time;
|
||||
struct mddev *mddev;
|
||||
/*
|
||||
* original bio going to /dev/mdx
|
||||
|
||||
@@ -325,8 +325,6 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
|
||||
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
|
||||
if (r10_bio->start_time)
|
||||
bio_end_io_acct(bio, r10_bio->start_time);
|
||||
bio_endio(bio);
|
||||
/*
|
||||
* Wake up any possible resync thread that waits for the device
|
||||
@@ -1172,7 +1170,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
|
||||
}
|
||||
|
||||
static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
struct r10bio *r10_bio)
|
||||
struct r10bio *r10_bio, bool io_accounting)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct bio *read_bio;
|
||||
@@ -1243,9 +1241,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
}
|
||||
slot = r10_bio->read_slot;
|
||||
|
||||
if (!r10_bio->start_time &&
|
||||
blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
r10_bio->start_time = bio_start_io_acct(bio);
|
||||
if (io_accounting) {
|
||||
md_account_bio(mddev, &bio);
|
||||
r10_bio->master_bio = bio;
|
||||
}
|
||||
read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
|
||||
|
||||
r10_bio->devs[slot].bio = read_bio;
|
||||
@@ -1322,6 +1321,25 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
}
|
||||
}
|
||||
|
||||
static struct md_rdev *dereference_rdev_and_rrdev(struct raid10_info *mirror,
|
||||
struct md_rdev **prrdev)
|
||||
{
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rrdev = rcu_dereference(mirror->replacement);
|
||||
/*
|
||||
* Read replacement first to prevent reading both rdev and
|
||||
* replacement as NULL during replacement replace rdev.
|
||||
*/
|
||||
smp_mb();
|
||||
rdev = rcu_dereference(mirror->rdev);
|
||||
if (rdev == rrdev)
|
||||
rrdev = NULL;
|
||||
|
||||
*prrdev = rrdev;
|
||||
return rdev;
|
||||
}
|
||||
|
||||
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
{
|
||||
int i;
|
||||
@@ -1332,11 +1350,9 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
blocked_rdev = NULL;
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->copies; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
struct md_rdev *rrdev = rcu_dereference(
|
||||
conf->mirrors[i].replacement);
|
||||
if (rdev == rrdev)
|
||||
rrdev = NULL;
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rdev = dereference_rdev_and_rrdev(&conf->mirrors[i], &rrdev);
|
||||
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
blocked_rdev = rdev;
|
||||
@@ -1465,15 +1481,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
int d = r10_bio->devs[i].devnum;
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rrdev = rcu_dereference(conf->mirrors[d].replacement);
|
||||
/*
|
||||
* Read replacement first to prevent reading both rdev and
|
||||
* replacement as NULL during replacement replace rdev.
|
||||
*/
|
||||
smp_mb();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev == rrdev)
|
||||
rrdev = NULL;
|
||||
rdev = dereference_rdev_and_rrdev(&conf->mirrors[d], &rrdev);
|
||||
if (rdev && (test_bit(Faulty, &rdev->flags)))
|
||||
rdev = NULL;
|
||||
if (rrdev && (test_bit(Faulty, &rrdev->flags)))
|
||||
@@ -1543,8 +1551,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r10_bio->master_bio = bio;
|
||||
}
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
r10_bio->start_time = bio_start_io_acct(bio);
|
||||
md_account_bio(mddev, &bio);
|
||||
r10_bio->master_bio = bio;
|
||||
atomic_set(&r10_bio->remaining, 1);
|
||||
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
|
||||
|
||||
@@ -1571,12 +1579,11 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
|
||||
r10_bio->sector = bio->bi_iter.bi_sector;
|
||||
r10_bio->state = 0;
|
||||
r10_bio->read_slot = -1;
|
||||
r10_bio->start_time = 0;
|
||||
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
|
||||
conf->geo.raid_disks);
|
||||
|
||||
if (bio_data_dir(bio) == READ)
|
||||
raid10_read_request(mddev, bio, r10_bio);
|
||||
raid10_read_request(mddev, bio, r10_bio, true);
|
||||
else
|
||||
raid10_write_request(mddev, bio, r10_bio);
|
||||
}
|
||||
@@ -1780,10 +1787,9 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (disk = 0; disk < geo->raid_disks; disk++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
struct md_rdev *rrdev = rcu_dereference(
|
||||
conf->mirrors[disk].replacement);
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rdev = dereference_rdev_and_rrdev(&conf->mirrors[disk], &rrdev);
|
||||
r10_bio->devs[disk].bio = NULL;
|
||||
r10_bio->devs[disk].repl_bio = NULL;
|
||||
|
||||
@@ -2720,10 +2726,10 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
||||
static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio)
|
||||
{
|
||||
int sect = 0; /* Offset from r10_bio->sector */
|
||||
int sectors = r10_bio->sectors;
|
||||
int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
|
||||
struct md_rdev *rdev;
|
||||
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
|
||||
int d = r10_bio->devs[r10_bio->read_slot].devnum;
|
||||
int d = r10_bio->devs[slot].devnum;
|
||||
|
||||
/* still own a reference to this rdev, so it cannot
|
||||
* have been cleared recently.
|
||||
@@ -2744,13 +2750,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
pr_notice("md/raid10:%s: %pg: Failing raid device\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
md_error(mddev, rdev);
|
||||
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
|
||||
r10_bio->devs[slot].bio = IO_BLOCKED;
|
||||
return;
|
||||
}
|
||||
|
||||
while(sectors) {
|
||||
int s = sectors;
|
||||
int sl = r10_bio->read_slot;
|
||||
int sl = slot;
|
||||
int success = 0;
|
||||
int start;
|
||||
|
||||
@@ -2785,7 +2791,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
sl++;
|
||||
if (sl == conf->copies)
|
||||
sl = 0;
|
||||
} while (!success && sl != r10_bio->read_slot);
|
||||
} while (sl != slot);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!success) {
|
||||
@@ -2793,16 +2799,16 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
* as bad on the first device to discourage future
|
||||
* reads.
|
||||
*/
|
||||
int dn = r10_bio->devs[r10_bio->read_slot].devnum;
|
||||
int dn = r10_bio->devs[slot].devnum;
|
||||
rdev = conf->mirrors[dn].rdev;
|
||||
|
||||
if (!rdev_set_badblocks(
|
||||
rdev,
|
||||
r10_bio->devs[r10_bio->read_slot].addr
|
||||
r10_bio->devs[slot].addr
|
||||
+ sect,
|
||||
s, 0)) {
|
||||
md_error(mddev, rdev);
|
||||
r10_bio->devs[r10_bio->read_slot].bio
|
||||
r10_bio->devs[slot].bio
|
||||
= IO_BLOCKED;
|
||||
}
|
||||
break;
|
||||
@@ -2811,7 +2817,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
start = sl;
|
||||
/* write it back and re-read */
|
||||
rcu_read_lock();
|
||||
while (sl != r10_bio->read_slot) {
|
||||
while (sl != slot) {
|
||||
if (sl==0)
|
||||
sl = conf->copies;
|
||||
sl--;
|
||||
@@ -2845,7 +2851,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
rcu_read_lock();
|
||||
}
|
||||
sl = start;
|
||||
while (sl != r10_bio->read_slot) {
|
||||
while (sl != slot) {
|
||||
if (sl==0)
|
||||
sl = conf->copies;
|
||||
sl--;
|
||||
@@ -2985,7 +2991,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
r10_bio->state = 0;
|
||||
raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
|
||||
raid10_read_request(mddev, r10_bio->master_bio, r10_bio, false);
|
||||
/*
|
||||
* allow_barrier after re-submit to ensure no sync io
|
||||
* can be issued while regular io pending.
|
||||
|
||||
@@ -123,7 +123,6 @@ struct r10bio {
|
||||
sector_t sector; /* virtual sector number */
|
||||
int sectors;
|
||||
unsigned long state;
|
||||
unsigned long start_time;
|
||||
struct mddev *mddev;
|
||||
/*
|
||||
* original bio going to /dev/mdx
|
||||
|
||||
@@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
|
||||
*/
|
||||
static void raid5_align_endio(struct bio *bi)
|
||||
{
|
||||
struct md_io_acct *md_io_acct = bi->bi_private;
|
||||
struct bio *raid_bi = md_io_acct->orig_bio;
|
||||
struct mddev *mddev;
|
||||
struct r5conf *conf;
|
||||
struct md_rdev *rdev;
|
||||
struct bio *raid_bi = bi->bi_private;
|
||||
struct md_rdev *rdev = (void *)raid_bi->bi_next;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
struct r5conf *conf = mddev->private;
|
||||
blk_status_t error = bi->bi_status;
|
||||
unsigned long start_time = md_io_acct->start_time;
|
||||
|
||||
bio_put(bi);
|
||||
|
||||
rdev = (void*)raid_bi->bi_next;
|
||||
raid_bi->bi_next = NULL;
|
||||
mddev = rdev->mddev;
|
||||
conf = mddev->private;
|
||||
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
|
||||
if (!error) {
|
||||
if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
|
||||
bio_end_io_acct(raid_bi, start_time);
|
||||
bio_endio(raid_bi);
|
||||
if (atomic_dec_and_test(&conf->active_aligned_reads))
|
||||
wake_up(&conf->wait_for_quiescent);
|
||||
@@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
struct md_rdev *rdev;
|
||||
sector_t sector, end_sector, first_bad;
|
||||
int bad_sectors, dd_idx;
|
||||
struct md_io_acct *md_io_acct;
|
||||
bool did_inc;
|
||||
|
||||
if (!in_chunk_boundary(mddev, raid_bio)) {
|
||||
@@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
return 0;
|
||||
}
|
||||
|
||||
align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
|
||||
&mddev->io_acct_set);
|
||||
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
|
||||
md_account_bio(mddev, &raid_bio);
|
||||
raid_bio->bi_next = (void *)rdev;
|
||||
if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
|
||||
md_io_acct->start_time = bio_start_io_acct(raid_bio);
|
||||
md_io_acct->orig_bio = raid_bio;
|
||||
|
||||
align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
|
||||
&mddev->bio_set);
|
||||
align_bio->bi_end_io = raid5_align_endio;
|
||||
align_bio->bi_private = md_io_acct;
|
||||
align_bio->bi_private = raid_bio;
|
||||
align_bio->bi_iter.bi_sector = sector;
|
||||
|
||||
/* No reshape active, so we can trust rdev->data_offset */
|
||||
@@ -7787,19 +7774,12 @@ static int raid5_run(struct mddev *mddev)
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *journal_dev = NULL;
|
||||
sector_t reshape_offset = 0;
|
||||
int i, ret = 0;
|
||||
int i;
|
||||
long long min_offset_diff = 0;
|
||||
int first = 1;
|
||||
|
||||
if (acct_bioset_init(mddev)) {
|
||||
pr_err("md/raid456:%s: alloc acct bioset failed.\n", mdname(mddev));
|
||||
if (mddev_init_writes_pending(mddev) < 0)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (mddev_init_writes_pending(mddev) < 0) {
|
||||
ret = -ENOMEM;
|
||||
goto exit_acct_set;
|
||||
}
|
||||
|
||||
if (mddev->recovery_cp != MaxSector)
|
||||
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
|
||||
@@ -7830,8 +7810,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
(mddev->bitmap_info.offset || mddev->bitmap_info.file)) {
|
||||
pr_notice("md/raid:%s: array cannot have both journal and bitmap\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mddev->reshape_position != MaxSector) {
|
||||
@@ -7856,15 +7835,13 @@ static int raid5_run(struct mddev *mddev)
|
||||
if (journal_dev) {
|
||||
pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mddev->new_level != mddev->level) {
|
||||
pr_warn("md/raid:%s: unsupported reshape required - aborting.\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
old_disks = mddev->raid_disks - mddev->delta_disks;
|
||||
/* reshape_position must be on a new-stripe boundary, and one
|
||||
@@ -7880,8 +7857,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
if (sector_div(here_new, chunk_sectors * new_data_disks)) {
|
||||
pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
reshape_offset = here_new * chunk_sectors;
|
||||
/* here_new is the stripe we will write to */
|
||||
@@ -7903,8 +7879,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
else if (mddev->ro == 0) {
|
||||
pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (mddev->reshape_backwards
|
||||
? (here_new * chunk_sectors + min_offset_diff <=
|
||||
@@ -7914,8 +7889,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
/* Reading from the same stripe as writing to - bad */
|
||||
pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev));
|
||||
/* OK, we should be able to continue; */
|
||||
@@ -7939,10 +7913,8 @@ static int raid5_run(struct mddev *mddev)
|
||||
else
|
||||
conf = mddev->private;
|
||||
|
||||
if (IS_ERR(conf)) {
|
||||
ret = PTR_ERR(conf);
|
||||
goto exit_acct_set;
|
||||
}
|
||||
if (IS_ERR(conf))
|
||||
return PTR_ERR(conf);
|
||||
|
||||
if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
|
||||
if (!journal_dev) {
|
||||
@@ -8140,10 +8112,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
free_conf(conf);
|
||||
mddev->private = NULL;
|
||||
pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
|
||||
ret = -EIO;
|
||||
exit_acct_set:
|
||||
acct_bioset_exit(mddev);
|
||||
return ret;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static void raid5_free(struct mddev *mddev, void *priv)
|
||||
@@ -8151,7 +8120,6 @@ static void raid5_free(struct mddev *mddev, void *priv)
|
||||
struct r5conf *conf = priv;
|
||||
|
||||
free_conf(conf);
|
||||
acct_bioset_exit(mddev);
|
||||
mddev->to_remove = &raid5_attrs_group;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user