mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 13:30:45 -05:00
Merge branch 'block-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block into xfs-6.16-merge
Merging block tree into XFS because of some dependencies like bdev_validate_blocksize() Signed-off-by: Carlos Maiolino <cem@kernel.org>
This commit is contained in:
67
block/bdev.c
67
block/bdev.c
@@ -152,27 +152,65 @@ static void set_init_blocksize(struct block_device *bdev)
|
||||
get_order(bsize));
|
||||
}
|
||||
|
||||
/**
|
||||
* bdev_validate_blocksize - check that this block size is acceptable
|
||||
* @bdev: blockdevice to check
|
||||
* @block_size: block size to check
|
||||
*
|
||||
* For block device users that do not use buffer heads or the block device
|
||||
* page cache, make sure that this block size can be used with the device.
|
||||
*
|
||||
* Return: On success zero is returned, negative error code on failure.
|
||||
*/
|
||||
int bdev_validate_blocksize(struct block_device *bdev, int block_size)
|
||||
{
|
||||
if (blk_validate_block_size(block_size))
|
||||
return -EINVAL;
|
||||
|
||||
/* Size cannot be smaller than the size supported by the device */
|
||||
if (block_size < bdev_logical_block_size(bdev))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_validate_blocksize);
|
||||
|
||||
int set_blocksize(struct file *file, int size)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct block_device *bdev = I_BDEV(inode);
|
||||
int ret;
|
||||
|
||||
if (blk_validate_block_size(size))
|
||||
return -EINVAL;
|
||||
|
||||
/* Size cannot be smaller than the size supported by the device */
|
||||
if (size < bdev_logical_block_size(bdev))
|
||||
return -EINVAL;
|
||||
ret = bdev_validate_blocksize(bdev, size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!file->private_data)
|
||||
return -EINVAL;
|
||||
|
||||
/* Don't change the size if it is same as current */
|
||||
if (inode->i_blkbits != blksize_bits(size)) {
|
||||
/*
|
||||
* Flush and truncate the pagecache before we reconfigure the
|
||||
* mapping geometry because folio sizes are variable now. If a
|
||||
* reader has already allocated a folio whose size is smaller
|
||||
* than the new min_order but invokes readahead after the new
|
||||
* min_order becomes visible, readahead will think there are
|
||||
* "zero" blocks per folio and crash. Take the inode and
|
||||
* invalidation locks to avoid racing with
|
||||
* read/write/fallocate.
|
||||
*/
|
||||
inode_lock(inode);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
sync_blockdev(bdev);
|
||||
kill_bdev(bdev);
|
||||
|
||||
inode->i_blkbits = blksize_bits(size);
|
||||
mapping_set_folio_min_order(inode->i_mapping, get_order(size));
|
||||
kill_bdev(bdev);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
inode_unlock(inode);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -777,13 +815,13 @@ static void blkdev_put_part(struct block_device *part)
|
||||
blkdev_put_whole(whole);
|
||||
}
|
||||
|
||||
struct block_device *blkdev_get_no_open(dev_t dev)
|
||||
struct block_device *blkdev_get_no_open(dev_t dev, bool autoload)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct inode *inode;
|
||||
|
||||
inode = ilookup(blockdev_superblock, dev);
|
||||
if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
|
||||
if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
|
||||
blk_request_module(dev);
|
||||
inode = ilookup(blockdev_superblock, dev);
|
||||
if (inode)
|
||||
@@ -1005,7 +1043,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
bdev = blkdev_get_no_open(dev);
|
||||
bdev = blkdev_get_no_open(dev, true);
|
||||
if (!bdev)
|
||||
return ERR_PTR(-ENXIO);
|
||||
|
||||
@@ -1275,18 +1313,15 @@ void sync_bdevs(bool wait)
|
||||
void bdev_statx(struct path *path, struct kstat *stat,
|
||||
u32 request_mask)
|
||||
{
|
||||
struct inode *backing_inode;
|
||||
struct block_device *bdev;
|
||||
|
||||
backing_inode = d_backing_inode(path->dentry);
|
||||
|
||||
/*
|
||||
* Note that backing_inode is the inode of a block device node file,
|
||||
* not the block device's internal inode. Therefore it is *not* valid
|
||||
* to use I_BDEV() here; the block device has to be looked up by i_rdev
|
||||
* Note that d_backing_inode() returns the block device node inode, not
|
||||
* the block device's internal inode. Therefore it is *not* valid to
|
||||
* use I_BDEV() here; the block device has to be looked up by i_rdev
|
||||
* instead.
|
||||
*/
|
||||
bdev = blkdev_get_no_open(backing_inode->i_rdev);
|
||||
bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false);
|
||||
if (!bdev)
|
||||
return;
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
* not aware of PI.
|
||||
*/
|
||||
#include <linux/blk-integrity.h>
|
||||
#include <linux/t10-pi.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include "blk.h"
|
||||
|
||||
@@ -43,6 +44,29 @@ static void bio_integrity_verify_fn(struct work_struct *work)
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
#define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG)
|
||||
static bool bip_should_check(struct bio_integrity_payload *bip)
|
||||
{
|
||||
return bip->bip_flags & BIP_CHECK_FLAGS;
|
||||
}
|
||||
|
||||
static bool bi_offload_capable(struct blk_integrity *bi)
|
||||
{
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
return bi->tuple_size == sizeof(struct crc64_pi_tuple);
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
return bi->tuple_size == sizeof(struct t10_pi_tuple);
|
||||
default:
|
||||
pr_warn_once("%s: unknown integrity checksum type:%d\n",
|
||||
__func__, bi->csum_type);
|
||||
fallthrough;
|
||||
case BLK_INTEGRITY_CSUM_NONE:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_integrity_endio - Integrity I/O completion function
|
||||
* @bio: Protected bio
|
||||
@@ -54,12 +78,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
|
||||
*/
|
||||
bool __bio_integrity_endio(struct bio *bio)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct bio_integrity_data *bid =
|
||||
container_of(bip, struct bio_integrity_data, bip);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
|
||||
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
|
||||
bip_should_check(bip)) {
|
||||
INIT_WORK(&bid->work, bio_integrity_verify_fn);
|
||||
queue_work(kintegrityd_wq, &bid->work);
|
||||
return false;
|
||||
@@ -84,6 +108,7 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_data *bid;
|
||||
bool set_flags = true;
|
||||
gfp_t gfp = GFP_NOIO;
|
||||
unsigned int len;
|
||||
void *buf;
|
||||
@@ -100,19 +125,24 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_READ:
|
||||
if (bi->flags & BLK_INTEGRITY_NOVERIFY)
|
||||
return true;
|
||||
if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
|
||||
if (bi_offload_capable(bi))
|
||||
return true;
|
||||
set_flags = false;
|
||||
}
|
||||
break;
|
||||
case REQ_OP_WRITE:
|
||||
if (bi->flags & BLK_INTEGRITY_NOGENERATE)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Zero the memory allocated to not leak uninitialized kernel
|
||||
* memory to disk for non-integrity metadata where nothing else
|
||||
* initializes the memory.
|
||||
*/
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
|
||||
if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
|
||||
if (bi_offload_capable(bi))
|
||||
return true;
|
||||
set_flags = false;
|
||||
gfp |= __GFP_ZERO;
|
||||
} else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
|
||||
gfp |= __GFP_ZERO;
|
||||
break;
|
||||
default:
|
||||
@@ -137,19 +167,21 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
|
||||
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
|
||||
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
|
||||
bid->bip.bip_flags |= BIP_IP_CHECKSUM;
|
||||
if (bi->csum_type)
|
||||
bid->bip.bip_flags |= BIP_CHECK_GUARD;
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
|
||||
if (set_flags) {
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
|
||||
bid->bip.bip_flags |= BIP_IP_CHECKSUM;
|
||||
if (bi->csum_type)
|
||||
bid->bip.bip_flags |= BIP_CHECK_GUARD;
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
|
||||
}
|
||||
|
||||
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
|
||||
offset_in_page(buf)) < len)
|
||||
goto err_end_io;
|
||||
|
||||
/* Auto-generate integrity metadata if this is a write */
|
||||
if (bio_data_dir(bio) == WRITE)
|
||||
if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
|
||||
blk_integrity_generate(bio);
|
||||
else
|
||||
bid->saved_bio_iter = bio->bi_iter;
|
||||
|
||||
@@ -66,16 +66,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
||||
}
|
||||
EXPORT_SYMBOL(bio_integrity_alloc);
|
||||
|
||||
static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
|
||||
bool dirty)
|
||||
static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_vecs; i++) {
|
||||
if (dirty && !PageCompound(bv[i].bv_page))
|
||||
set_page_dirty_lock(bv[i].bv_page);
|
||||
for (i = 0; i < nr_vecs; i++)
|
||||
unpin_user_page(bv[i].bv_page);
|
||||
}
|
||||
}
|
||||
|
||||
static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
|
||||
@@ -91,7 +87,7 @@ static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
|
||||
ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter);
|
||||
WARN_ON_ONCE(ret != bytes);
|
||||
|
||||
bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs, true);
|
||||
bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -111,8 +107,7 @@ void bio_integrity_unmap_user(struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt,
|
||||
bio_data_dir(bio) == READ);
|
||||
bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -198,7 +193,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
|
||||
}
|
||||
|
||||
if (write)
|
||||
bio_integrity_unpin_bvec(bvec, nr_vecs, false);
|
||||
bio_integrity_unpin_bvec(bvec, nr_vecs);
|
||||
else
|
||||
memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
|
||||
|
||||
@@ -319,7 +314,7 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
|
||||
return 0;
|
||||
|
||||
release_pages:
|
||||
bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
|
||||
bio_integrity_unpin_bvec(bvec, nr_bvecs);
|
||||
free_bvec:
|
||||
if (bvec != stack_vec)
|
||||
kfree(bvec);
|
||||
|
||||
@@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
|
||||
return -EINVAL;
|
||||
input = skip_spaces(input);
|
||||
|
||||
bdev = blkdev_get_no_open(MKDEV(major, minor));
|
||||
bdev = blkdev_get_no_open(MKDEV(major, minor), false);
|
||||
if (!bdev)
|
||||
return -ENODEV;
|
||||
if (bdev_is_partition(bdev)) {
|
||||
|
||||
@@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi,
|
||||
/*
|
||||
* For read-ahead of large files to be effective, we need to read ahead
|
||||
* at least twice the optimal I/O size.
|
||||
*
|
||||
* There is no hardware limitation for the read-ahead size and the user
|
||||
* might have increased the read-ahead size through sysfs, so don't ever
|
||||
* decrease it.
|
||||
*/
|
||||
bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
|
||||
bdi->ra_pages = max3(bdi->ra_pages,
|
||||
lim->io_opt * 2 / PAGE_SIZE,
|
||||
VM_READAHEAD_PAGES);
|
||||
bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
|
||||
}
|
||||
|
||||
|
||||
@@ -909,6 +909,8 @@ int blk_register_queue(struct gendisk *disk)
|
||||
out_debugfs_remove:
|
||||
blk_debugfs_remove(disk);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_sysfs_unregister(disk);
|
||||
out_put_queue_kobj:
|
||||
kobject_put(&disk->queue_kobj);
|
||||
return ret;
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef BLK_THROTTLE_H
|
||||
#define BLK_THROTTLE_H
|
||||
|
||||
|
||||
@@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
op = REQ_OP_ZONE_RESET;
|
||||
|
||||
/* Invalidate the page cache, including dirty pages. */
|
||||
inode_lock(bdev->bd_mapping->host);
|
||||
filemap_invalidate_lock(bdev->bd_mapping);
|
||||
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
|
||||
if (ret)
|
||||
@@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
|
||||
|
||||
fail:
|
||||
if (cmd == BLKRESETZONE)
|
||||
if (cmd == BLKRESETZONE) {
|
||||
filemap_invalidate_unlock(bdev->bd_mapping);
|
||||
inode_unlock(bdev->bd_mapping->host);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done)
|
||||
wait_for_completion_io(done);
|
||||
}
|
||||
|
||||
struct block_device *blkdev_get_no_open(dev_t dev, bool autoload);
|
||||
void blkdev_put_no_open(struct block_device *bdev);
|
||||
|
||||
#define BIO_INLINE_VECS 4
|
||||
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
|
||||
gfp_t gfp_mask);
|
||||
@@ -477,7 +480,8 @@ static inline void blk_zone_update_request_bio(struct request *rq,
|
||||
* the original BIO sector so that blk_zone_write_plug_bio_endio() can
|
||||
* lookup the zone write plug.
|
||||
*/
|
||||
if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio))
|
||||
if (req_op(rq) == REQ_OP_ZONE_APPEND ||
|
||||
bio_flagged(bio, BIO_EMULATES_ZONE_APPEND))
|
||||
bio->bi_iter.bi_sector = rq->__sector;
|
||||
}
|
||||
void blk_zone_write_plug_bio_endio(struct bio *bio);
|
||||
|
||||
18
block/fops.c
18
block/fops.c
@@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bdev = blkdev_get_no_open(inode->i_rdev);
|
||||
bdev = blkdev_get_no_open(inode->i_rdev, true);
|
||||
if (!bdev)
|
||||
return -ENXIO;
|
||||
|
||||
@@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
ret = direct_write_fallback(iocb, from, ret,
|
||||
blkdev_buffered_write(iocb, from));
|
||||
} else {
|
||||
/*
|
||||
* Take i_rwsem and invalidate_lock to avoid racing with
|
||||
* set_blocksize changing i_blkbits/folio order and punching
|
||||
* out the pagecache.
|
||||
*/
|
||||
inode_lock_shared(bd_inode);
|
||||
ret = blkdev_buffered_write(iocb, from);
|
||||
inode_unlock_shared(bd_inode);
|
||||
}
|
||||
|
||||
if (ret > 0)
|
||||
@@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
|
||||
static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
struct inode *bd_inode = bdev_file_inode(iocb->ki_filp);
|
||||
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||
loff_t size = bdev_nr_bytes(bdev);
|
||||
loff_t pos = iocb->ki_pos;
|
||||
@@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
goto reexpand;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take i_rwsem and invalidate_lock to avoid racing with set_blocksize
|
||||
* changing i_blkbits/folio order and punching out the pagecache.
|
||||
*/
|
||||
inode_lock_shared(bd_inode);
|
||||
ret = filemap_read(iocb, to, ret);
|
||||
inode_unlock_shared(bd_inode);
|
||||
|
||||
reexpand:
|
||||
if (unlikely(shorted))
|
||||
@@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
|
||||
if ((start | len) & (bdev_logical_block_size(bdev) - 1))
|
||||
return -EINVAL;
|
||||
|
||||
inode_lock(inode);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
/*
|
||||
@@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
|
||||
|
||||
fail:
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
inode_unlock(inode);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
@@ -142,6 +142,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
inode_lock(bdev->bd_mapping->host);
|
||||
filemap_invalidate_lock(bdev->bd_mapping);
|
||||
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
|
||||
if (err)
|
||||
@@ -174,6 +175,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
blk_finish_plug(&plug);
|
||||
fail:
|
||||
filemap_invalidate_unlock(bdev->bd_mapping);
|
||||
inode_unlock(bdev->bd_mapping->host);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -199,12 +201,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
|
||||
end > bdev_nr_bytes(bdev))
|
||||
return -EINVAL;
|
||||
|
||||
inode_lock(bdev->bd_mapping->host);
|
||||
filemap_invalidate_lock(bdev->bd_mapping);
|
||||
err = truncate_bdev_range(bdev, mode, start, end - 1);
|
||||
if (!err)
|
||||
err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
|
||||
GFP_KERNEL);
|
||||
filemap_invalidate_unlock(bdev->bd_mapping);
|
||||
inode_unlock(bdev->bd_mapping->host);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -236,6 +240,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
|
||||
return -EINVAL;
|
||||
|
||||
/* Invalidate the page cache, including dirty pages */
|
||||
inode_lock(bdev->bd_mapping->host);
|
||||
filemap_invalidate_lock(bdev->bd_mapping);
|
||||
err = truncate_bdev_range(bdev, mode, start, end);
|
||||
if (err)
|
||||
@@ -246,6 +251,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
|
||||
|
||||
fail:
|
||||
filemap_invalidate_unlock(bdev->bd_mapping);
|
||||
inode_unlock(bdev->bd_mapping->host);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -46,12 +46,8 @@ int ioprio_check_cap(int ioprio)
|
||||
*/
|
||||
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
|
||||
return -EPERM;
|
||||
fallthrough;
|
||||
/* rt has prio field too */
|
||||
case IOPRIO_CLASS_BE:
|
||||
if (level >= IOPRIO_NR_LEVELS)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case IOPRIO_CLASS_BE:
|
||||
case IOPRIO_CLASS_IDLE:
|
||||
break;
|
||||
case IOPRIO_CLASS_NONE:
|
||||
|
||||
@@ -388,12 +388,6 @@ config BLK_DEV_UBLK
|
||||
definition isn't finalized yet, and might change according to future
|
||||
requirement, so mark is as experimental now.
|
||||
|
||||
Say Y if you want to get better performance because task_work_add()
|
||||
can be used in IO path for replacing io_uring cmd, which will become
|
||||
shared between IO tasks and ubq daemon, meantime task_work_add() can
|
||||
can handle batch more effectively, but task_work_add() isn't exported
|
||||
for module, so ublk has to be built to kernel.
|
||||
|
||||
config BLKDEV_UBLK_LEGACY_OPCODES
|
||||
bool "Support legacy command opcode"
|
||||
depends on BLK_DEV_UBLK
|
||||
|
||||
@@ -211,72 +211,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size)
|
||||
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
|
||||
}
|
||||
|
||||
static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
|
||||
{
|
||||
struct iov_iter i;
|
||||
ssize_t bw;
|
||||
|
||||
iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len);
|
||||
|
||||
bw = vfs_iter_write(file, &i, ppos, 0);
|
||||
|
||||
if (likely(bw == bvec->bv_len))
|
||||
return 0;
|
||||
|
||||
printk_ratelimited(KERN_ERR
|
||||
"loop: Write error at byte offset %llu, length %i.\n",
|
||||
(unsigned long long)*ppos, bvec->bv_len);
|
||||
if (bw >= 0)
|
||||
bw = -EIO;
|
||||
return bw;
|
||||
}
|
||||
|
||||
static int lo_write_simple(struct loop_device *lo, struct request *rq,
|
||||
loff_t pos)
|
||||
{
|
||||
struct bio_vec bvec;
|
||||
struct req_iterator iter;
|
||||
int ret = 0;
|
||||
|
||||
rq_for_each_segment(bvec, rq, iter) {
|
||||
ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos);
|
||||
if (ret < 0)
|
||||
break;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lo_read_simple(struct loop_device *lo, struct request *rq,
|
||||
loff_t pos)
|
||||
{
|
||||
struct bio_vec bvec;
|
||||
struct req_iterator iter;
|
||||
struct iov_iter i;
|
||||
ssize_t len;
|
||||
|
||||
rq_for_each_segment(bvec, rq, iter) {
|
||||
iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len);
|
||||
len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
|
||||
if (len < 0)
|
||||
return len;
|
||||
|
||||
flush_dcache_page(bvec.bv_page);
|
||||
|
||||
if (len != bvec.bv_len) {
|
||||
struct bio *bio;
|
||||
|
||||
__rq_for_each_bio(bio, rq)
|
||||
zero_fill_bio(bio);
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void loop_clear_limits(struct loop_device *lo, int mode)
|
||||
{
|
||||
struct queue_limits lim = queue_limits_start_update(lo->lo_queue);
|
||||
@@ -342,7 +276,7 @@ static void lo_complete_rq(struct request *rq)
|
||||
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
|
||||
if (cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
|
||||
req_op(rq) != REQ_OP_READ) {
|
||||
if (cmd->ret < 0)
|
||||
ret = errno_to_blk_status(cmd->ret);
|
||||
@@ -358,14 +292,13 @@ static void lo_complete_rq(struct request *rq)
|
||||
cmd->ret = 0;
|
||||
blk_mq_requeue_request(rq, true);
|
||||
} else {
|
||||
if (cmd->use_aio) {
|
||||
struct bio *bio = rq->bio;
|
||||
struct bio *bio = rq->bio;
|
||||
|
||||
while (bio) {
|
||||
zero_fill_bio(bio);
|
||||
bio = bio->bi_next;
|
||||
}
|
||||
while (bio) {
|
||||
zero_fill_bio(bio);
|
||||
bio = bio->bi_next;
|
||||
}
|
||||
|
||||
ret = BLK_STS_IOERR;
|
||||
end_io:
|
||||
blk_mq_end_request(rq, ret);
|
||||
@@ -445,9 +378,14 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
|
||||
|
||||
cmd->iocb.ki_pos = pos;
|
||||
cmd->iocb.ki_filp = file;
|
||||
cmd->iocb.ki_complete = lo_rw_aio_complete;
|
||||
cmd->iocb.ki_flags = IOCB_DIRECT;
|
||||
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
|
||||
cmd->iocb.ki_ioprio = req_get_ioprio(rq);
|
||||
if (cmd->use_aio) {
|
||||
cmd->iocb.ki_complete = lo_rw_aio_complete;
|
||||
cmd->iocb.ki_flags = IOCB_DIRECT;
|
||||
} else {
|
||||
cmd->iocb.ki_complete = NULL;
|
||||
cmd->iocb.ki_flags = 0;
|
||||
}
|
||||
|
||||
if (rw == ITER_SOURCE)
|
||||
ret = file->f_op->write_iter(&cmd->iocb, &iter);
|
||||
@@ -458,7 +396,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
|
||||
|
||||
if (ret != -EIOCBQUEUED)
|
||||
lo_rw_aio_complete(&cmd->iocb, ret);
|
||||
return 0;
|
||||
return -EIOCBQUEUED;
|
||||
}
|
||||
|
||||
static int do_req_filebacked(struct loop_device *lo, struct request *rq)
|
||||
@@ -466,15 +404,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
|
||||
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
||||
loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
|
||||
|
||||
/*
|
||||
* lo_write_simple and lo_read_simple should have been covered
|
||||
* by io submit style function like lo_rw_aio(), one blocker
|
||||
* is that lo_read_simple() need to call flush_dcache_page after
|
||||
* the page is written from kernel, and it isn't easy to handle
|
||||
* this in io submit style function which submits all segments
|
||||
* of the req at one time. And direct read IO doesn't need to
|
||||
* run flush_dcache_page().
|
||||
*/
|
||||
switch (req_op(rq)) {
|
||||
case REQ_OP_FLUSH:
|
||||
return lo_req_flush(lo, rq);
|
||||
@@ -490,15 +419,9 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
|
||||
case REQ_OP_DISCARD:
|
||||
return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
|
||||
case REQ_OP_WRITE:
|
||||
if (cmd->use_aio)
|
||||
return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
|
||||
else
|
||||
return lo_write_simple(lo, rq, pos);
|
||||
return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
|
||||
case REQ_OP_READ:
|
||||
if (cmd->use_aio)
|
||||
return lo_rw_aio(lo, cmd, pos, ITER_DEST);
|
||||
else
|
||||
return lo_read_simple(lo, rq, pos);
|
||||
return lo_rw_aio(lo, cmd, pos, ITER_DEST);
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return -EIO;
|
||||
@@ -582,6 +505,17 @@ static void loop_assign_backing_file(struct loop_device *lo, struct file *file)
|
||||
lo->lo_min_dio_size = loop_query_min_dio_size(lo);
|
||||
}
|
||||
|
||||
static int loop_check_backing_file(struct file *file)
|
||||
{
|
||||
if (!file->f_op->read_iter)
|
||||
return -EINVAL;
|
||||
|
||||
if ((file->f_mode & FMODE_WRITE) && !file->f_op->write_iter)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* loop_change_fd switched the backing store of a loopback device to
|
||||
* a new file. This is useful for operating system installers to free up
|
||||
@@ -603,6 +537,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
|
||||
error = loop_check_backing_file(file);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* suppress uevents while reconfiguring the device */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
|
||||
|
||||
@@ -662,19 +600,20 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||
* dependency.
|
||||
*/
|
||||
fput(old_file);
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
|
||||
if (partscan)
|
||||
loop_reread_partitions(lo);
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
/* enable and uncork uevent now that we are done */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
|
||||
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
|
||||
return error;
|
||||
|
||||
out_err:
|
||||
loop_global_unlock(lo, is_loop);
|
||||
out_putf:
|
||||
fput(file);
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -1039,6 +978,14 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
|
||||
if ((mode & BLK_OPEN_WRITE) && !file->f_op->write_iter)
|
||||
return -EINVAL;
|
||||
|
||||
error = loop_check_backing_file(file);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
is_loop = is_loop_device(file);
|
||||
|
||||
/* This is safe, since we have a reference from open(). */
|
||||
@@ -1129,8 +1076,8 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
if (partscan)
|
||||
clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
|
||||
|
||||
/* enable and uncork uevent now that we are done */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
|
||||
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
|
||||
|
||||
loop_global_unlock(lo, is_loop);
|
||||
if (partscan)
|
||||
@@ -1921,7 +1868,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
|
||||
struct loop_device *lo = rq->q->queuedata;
|
||||
int ret = 0;
|
||||
struct mem_cgroup *old_memcg = NULL;
|
||||
const bool use_aio = cmd->use_aio;
|
||||
|
||||
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
|
||||
ret = -EIO;
|
||||
@@ -1951,7 +1897,7 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
|
||||
}
|
||||
failed:
|
||||
/* complete non-aio request */
|
||||
if (!use_aio || ret) {
|
||||
if (ret != -EIOCBQUEUED) {
|
||||
if (ret == -EOPNOTSUPP)
|
||||
cmd->ret = ret;
|
||||
else
|
||||
|
||||
@@ -122,15 +122,6 @@ struct ublk_uring_cmd_pdu {
|
||||
*/
|
||||
#define UBLK_IO_FLAG_OWNED_BY_SRV 0x02
|
||||
|
||||
/*
|
||||
* IO command is aborted, so this flag is set in case of
|
||||
* !UBLK_IO_FLAG_ACTIVE.
|
||||
*
|
||||
* After this flag is observed, any pending or new incoming request
|
||||
* associated with this io command will be failed immediately
|
||||
*/
|
||||
#define UBLK_IO_FLAG_ABORTED 0x04
|
||||
|
||||
/*
|
||||
* UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
|
||||
* get data buffer address from ublksrv.
|
||||
@@ -199,8 +190,6 @@ struct ublk_device {
|
||||
struct completion completion;
|
||||
unsigned int nr_queues_ready;
|
||||
unsigned int nr_privileged_daemon;
|
||||
|
||||
struct work_struct nosrv_work;
|
||||
};
|
||||
|
||||
/* header of ublk_params */
|
||||
@@ -209,18 +198,13 @@ struct ublk_params_header {
|
||||
__u32 types;
|
||||
};
|
||||
|
||||
static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq);
|
||||
|
||||
static void ublk_stop_dev_unlocked(struct ublk_device *ub);
|
||||
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
|
||||
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
|
||||
struct ublk_queue *ubq, int tag, size_t offset);
|
||||
const struct ublk_queue *ubq, int tag, size_t offset);
|
||||
static inline unsigned int ublk_req_build_flags(struct request *req);
|
||||
static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
|
||||
int tag);
|
||||
static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub)
|
||||
{
|
||||
return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
|
||||
}
|
||||
|
||||
static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
|
||||
{
|
||||
return ub->dev_info.flags & UBLK_F_ZONED;
|
||||
@@ -620,14 +604,19 @@ static void ublk_apply_params(struct ublk_device *ub)
|
||||
ublk_dev_param_zoned_apply(ub);
|
||||
}
|
||||
|
||||
static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
|
||||
{
|
||||
return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY;
|
||||
}
|
||||
|
||||
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
|
||||
{
|
||||
return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
|
||||
return ubq->flags & UBLK_F_USER_COPY;
|
||||
}
|
||||
|
||||
static inline bool ublk_need_map_io(const struct ublk_queue *ubq)
|
||||
{
|
||||
return !ublk_support_user_copy(ubq);
|
||||
return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq);
|
||||
}
|
||||
|
||||
static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
|
||||
@@ -635,8 +624,11 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
|
||||
/*
|
||||
* read()/write() is involved in user copy, so request reference
|
||||
* has to be grabbed
|
||||
*
|
||||
* for zero copy, request buffer need to be registered to io_uring
|
||||
* buffer table, so reference is needed
|
||||
*/
|
||||
return ublk_support_user_copy(ubq);
|
||||
return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq);
|
||||
}
|
||||
|
||||
static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
|
||||
@@ -1074,7 +1066,7 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(
|
||||
|
||||
static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
|
||||
{
|
||||
return ubq->ubq_daemon->flags & PF_EXITING;
|
||||
return !ubq->ubq_daemon || ubq->ubq_daemon->flags & PF_EXITING;
|
||||
}
|
||||
|
||||
/* todo: handle partial completion */
|
||||
@@ -1085,12 +1077,6 @@ static inline void __ublk_complete_rq(struct request *req)
|
||||
unsigned int unmapped_bytes;
|
||||
blk_status_t res = BLK_STS_OK;
|
||||
|
||||
/* called from ublk_abort_queue() code path */
|
||||
if (io->flags & UBLK_IO_FLAG_ABORTED) {
|
||||
res = BLK_STS_IOERR;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* failed read IO if nothing is read */
|
||||
if (!io->res && req_op(req) == REQ_OP_READ)
|
||||
io->res = -EIO;
|
||||
@@ -1140,47 +1126,6 @@ static void ublk_complete_rq(struct kref *ref)
|
||||
__ublk_complete_rq(req);
|
||||
}
|
||||
|
||||
static void ublk_do_fail_rq(struct request *req)
|
||||
{
|
||||
struct ublk_queue *ubq = req->mq_hctx->driver_data;
|
||||
|
||||
if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
|
||||
blk_mq_requeue_request(req, false);
|
||||
else
|
||||
__ublk_complete_rq(req);
|
||||
}
|
||||
|
||||
static void ublk_fail_rq_fn(struct kref *ref)
|
||||
{
|
||||
struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
|
||||
ref);
|
||||
struct request *req = blk_mq_rq_from_pdu(data);
|
||||
|
||||
ublk_do_fail_rq(req);
|
||||
}
|
||||
|
||||
/*
|
||||
* Since ublk_rq_task_work_cb always fails requests immediately during
|
||||
* exiting, __ublk_fail_req() is only called from abort context during
|
||||
* exiting. So lock is unnecessary.
|
||||
*
|
||||
* Also aborting may not be started yet, keep in mind that one failed
|
||||
* request may be issued by block layer again.
|
||||
*/
|
||||
static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
|
||||
struct request *req)
|
||||
{
|
||||
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
|
||||
|
||||
if (ublk_need_req_ref(ubq)) {
|
||||
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
|
||||
|
||||
kref_put(&data->ref, ublk_fail_rq_fn);
|
||||
} else {
|
||||
ublk_do_fail_rq(req);
|
||||
}
|
||||
}
|
||||
|
||||
static void ubq_complete_io_cmd(struct ublk_io *io, int res,
|
||||
unsigned issue_flags)
|
||||
{
|
||||
@@ -1336,8 +1281,6 @@ static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l)
|
||||
static enum blk_eh_timer_return ublk_timeout(struct request *rq)
|
||||
{
|
||||
struct ublk_queue *ubq = rq->mq_hctx->driver_data;
|
||||
unsigned int nr_inflight = 0;
|
||||
int i;
|
||||
|
||||
if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) {
|
||||
if (!ubq->timeout) {
|
||||
@@ -1348,26 +1291,6 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
|
||||
if (!ubq_daemon_is_dying(ubq))
|
||||
return BLK_EH_RESET_TIMER;
|
||||
|
||||
for (i = 0; i < ubq->q_depth; i++) {
|
||||
struct ublk_io *io = &ubq->ios[i];
|
||||
|
||||
if (!(io->flags & UBLK_IO_FLAG_ACTIVE))
|
||||
nr_inflight++;
|
||||
}
|
||||
|
||||
/* cancelable uring_cmd can't help us if all commands are in-flight */
|
||||
if (nr_inflight == ubq->q_depth) {
|
||||
struct ublk_device *ub = ubq->dev;
|
||||
|
||||
if (ublk_abort_requests(ub, ubq)) {
|
||||
schedule_work(&ub->nosrv_work);
|
||||
}
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
|
||||
return BLK_EH_RESET_TIMER;
|
||||
}
|
||||
|
||||
@@ -1470,6 +1393,37 @@ static const struct blk_mq_ops ublk_mq_ops = {
|
||||
.timeout = ublk_timeout,
|
||||
};
|
||||
|
||||
static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* All old ioucmds have to be completed */
|
||||
ubq->nr_io_ready = 0;
|
||||
|
||||
/*
|
||||
* old daemon is PF_EXITING, put it now
|
||||
*
|
||||
* It could be NULL in case of closing one quisced device.
|
||||
*/
|
||||
if (ubq->ubq_daemon)
|
||||
put_task_struct(ubq->ubq_daemon);
|
||||
/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
|
||||
ubq->ubq_daemon = NULL;
|
||||
ubq->timeout = false;
|
||||
|
||||
for (i = 0; i < ubq->q_depth; i++) {
|
||||
struct ublk_io *io = &ubq->ios[i];
|
||||
|
||||
/*
|
||||
* UBLK_IO_FLAG_CANCELED is kept for avoiding to touch
|
||||
* io->cmd
|
||||
*/
|
||||
io->flags &= UBLK_IO_FLAG_CANCELED;
|
||||
io->cmd = NULL;
|
||||
io->addr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int ublk_ch_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct ublk_device *ub = container_of(inode->i_cdev,
|
||||
@@ -1481,10 +1435,119 @@ static int ublk_ch_open(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ublk_reset_ch_dev(struct ublk_device *ub)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
|
||||
ublk_queue_reinit(ub, ublk_get_queue(ub, i));
|
||||
|
||||
/* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
|
||||
ub->mm = NULL;
|
||||
ub->nr_queues_ready = 0;
|
||||
ub->nr_privileged_daemon = 0;
|
||||
}
|
||||
|
||||
static struct gendisk *ublk_get_disk(struct ublk_device *ub)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
|
||||
spin_lock(&ub->lock);
|
||||
disk = ub->ub_disk;
|
||||
if (disk)
|
||||
get_device(disk_to_dev(disk));
|
||||
spin_unlock(&ub->lock);
|
||||
|
||||
return disk;
|
||||
}
|
||||
|
||||
static void ublk_put_disk(struct gendisk *disk)
|
||||
{
|
||||
if (disk)
|
||||
put_device(disk_to_dev(disk));
|
||||
}
|
||||
|
||||
static int ublk_ch_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct ublk_device *ub = filp->private_data;
|
||||
struct gendisk *disk;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* disk isn't attached yet, either device isn't live, or it has
|
||||
* been removed already, so we needn't to do anything
|
||||
*/
|
||||
disk = ublk_get_disk(ub);
|
||||
if (!disk)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* All uring_cmd are done now, so abort any request outstanding to
|
||||
* the ublk server
|
||||
*
|
||||
* This can be done in lockless way because ublk server has been
|
||||
* gone
|
||||
*
|
||||
* More importantly, we have to provide forward progress guarantee
|
||||
* without holding ub->mutex, otherwise control task grabbing
|
||||
* ub->mutex triggers deadlock
|
||||
*
|
||||
* All requests may be inflight, so ->canceling may not be set, set
|
||||
* it now.
|
||||
*/
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
|
||||
struct ublk_queue *ubq = ublk_get_queue(ub, i);
|
||||
|
||||
ubq->canceling = true;
|
||||
ublk_abort_queue(ub, ubq);
|
||||
}
|
||||
blk_mq_kick_requeue_list(disk->queue);
|
||||
|
||||
/*
|
||||
* All infligh requests have been completed or requeued and any new
|
||||
* request will be failed or requeued via `->canceling` now, so it is
|
||||
* fine to grab ub->mutex now.
|
||||
*/
|
||||
mutex_lock(&ub->mutex);
|
||||
|
||||
/* double check after grabbing lock */
|
||||
if (!ub->ub_disk)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* Transition the device to the nosrv state. What exactly this
|
||||
* means depends on the recovery flags
|
||||
*/
|
||||
blk_mq_quiesce_queue(disk->queue);
|
||||
if (ublk_nosrv_should_stop_dev(ub)) {
|
||||
/*
|
||||
* Allow any pending/future I/O to pass through quickly
|
||||
* with an error. This is needed because del_gendisk
|
||||
* waits for all pending I/O to complete
|
||||
*/
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
|
||||
ublk_get_queue(ub, i)->force_abort = true;
|
||||
blk_mq_unquiesce_queue(disk->queue);
|
||||
|
||||
ublk_stop_dev_unlocked(ub);
|
||||
} else {
|
||||
if (ublk_nosrv_dev_should_queue_io(ub)) {
|
||||
/* ->canceling is set and all requests are aborted */
|
||||
ub->dev_info.state = UBLK_S_DEV_QUIESCED;
|
||||
} else {
|
||||
ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
|
||||
ublk_get_queue(ub, i)->fail_io = true;
|
||||
}
|
||||
blk_mq_unquiesce_queue(disk->queue);
|
||||
}
|
||||
unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_put_disk(disk);
|
||||
|
||||
/* all uring_cmd has been done now, reset device & ubq */
|
||||
ublk_reset_ch_dev(ub);
|
||||
out:
|
||||
clear_bit(UB_STATE_OPEN, &ub->state);
|
||||
return 0;
|
||||
}
|
||||
@@ -1551,10 +1614,26 @@ static void ublk_commit_completion(struct ublk_device *ub,
|
||||
ublk_put_req_ref(ubq, req);
|
||||
}
|
||||
|
||||
static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
|
||||
struct request *req)
|
||||
{
|
||||
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
|
||||
|
||||
if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
|
||||
blk_mq_requeue_request(req, false);
|
||||
else {
|
||||
io->res = -EIO;
|
||||
__ublk_complete_rq(req);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from ubq_daemon context via cancel fn, meantime quiesce ublk
|
||||
* blk-mq queue, so we are called exclusively with blk-mq and ubq_daemon
|
||||
* context, so everything is serialized.
|
||||
* Called from ublk char device release handler, when any uring_cmd is
|
||||
* done, meantime request queue is "quiesced" since all inflight requests
|
||||
* can't be completed because ublk server is dead.
|
||||
*
|
||||
* So no one can hold our request IO reference any more, simply ignore the
|
||||
* reference, and complete the request immediately
|
||||
*/
|
||||
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
{
|
||||
@@ -1571,46 +1650,29 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
* will do it
|
||||
*/
|
||||
rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
|
||||
if (rq && blk_mq_request_started(rq)) {
|
||||
io->flags |= UBLK_IO_FLAG_ABORTED;
|
||||
if (rq && blk_mq_request_started(rq))
|
||||
__ublk_fail_req(ubq, io, rq);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Must be called when queue is frozen */
|
||||
static bool ublk_mark_queue_canceling(struct ublk_queue *ubq)
|
||||
static void ublk_mark_queue_canceling(struct ublk_queue *ubq)
|
||||
{
|
||||
bool canceled;
|
||||
|
||||
spin_lock(&ubq->cancel_lock);
|
||||
canceled = ubq->canceling;
|
||||
if (!canceled)
|
||||
if (!ubq->canceling)
|
||||
ubq->canceling = true;
|
||||
spin_unlock(&ubq->cancel_lock);
|
||||
|
||||
return canceled;
|
||||
}
|
||||
|
||||
static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
static void ublk_start_cancel(struct ublk_queue *ubq)
|
||||
{
|
||||
bool was_canceled = ubq->canceling;
|
||||
struct gendisk *disk;
|
||||
|
||||
if (was_canceled)
|
||||
return false;
|
||||
|
||||
spin_lock(&ub->lock);
|
||||
disk = ub->ub_disk;
|
||||
if (disk)
|
||||
get_device(disk_to_dev(disk));
|
||||
spin_unlock(&ub->lock);
|
||||
struct ublk_device *ub = ubq->dev;
|
||||
struct gendisk *disk = ublk_get_disk(ub);
|
||||
|
||||
/* Our disk has been dead */
|
||||
if (!disk)
|
||||
return false;
|
||||
|
||||
return;
|
||||
/*
|
||||
* Now we are serialized with ublk_queue_rq()
|
||||
*
|
||||
@@ -1619,25 +1681,36 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
* touch completed uring_cmd
|
||||
*/
|
||||
blk_mq_quiesce_queue(disk->queue);
|
||||
was_canceled = ublk_mark_queue_canceling(ubq);
|
||||
if (!was_canceled) {
|
||||
/* abort queue is for making forward progress */
|
||||
ublk_abort_queue(ub, ubq);
|
||||
}
|
||||
ublk_mark_queue_canceling(ubq);
|
||||
blk_mq_unquiesce_queue(disk->queue);
|
||||
put_device(disk_to_dev(disk));
|
||||
|
||||
return !was_canceled;
|
||||
ublk_put_disk(disk);
|
||||
}
|
||||
|
||||
static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
|
||||
static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct ublk_io *io = &ubq->ios[tag];
|
||||
struct ublk_device *ub = ubq->dev;
|
||||
struct request *req;
|
||||
bool done;
|
||||
|
||||
if (!(io->flags & UBLK_IO_FLAG_ACTIVE))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Don't try to cancel this command if the request is started for
|
||||
* avoiding race between io_uring_cmd_done() and
|
||||
* io_uring_cmd_complete_in_task().
|
||||
*
|
||||
* Either the started request will be aborted via __ublk_abort_rq(),
|
||||
* then this uring_cmd is canceled next time, or it will be done in
|
||||
* task work function ublk_dispatch_req() because io_uring guarantees
|
||||
* that ublk_dispatch_req() is always called
|
||||
*/
|
||||
req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
|
||||
if (req && blk_mq_request_started(req))
|
||||
return;
|
||||
|
||||
spin_lock(&ubq->cancel_lock);
|
||||
done = !!(io->flags & UBLK_IO_FLAG_CANCELED);
|
||||
if (!done)
|
||||
@@ -1651,6 +1724,17 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
|
||||
/*
|
||||
* The ublk char device won't be closed when calling cancel fn, so both
|
||||
* ublk device and queue are guaranteed to be live
|
||||
*
|
||||
* Two-stage cancel:
|
||||
*
|
||||
* - make every active uring_cmd done in ->cancel_fn()
|
||||
*
|
||||
* - aborting inflight ublk IO requests in ublk char device release handler,
|
||||
* which depends on 1st stage because device can only be closed iff all
|
||||
* uring_cmd are done
|
||||
*
|
||||
* Do _not_ try to acquire ub->mutex before all inflight requests are
|
||||
* aborted, otherwise deadlock may be caused.
|
||||
*/
|
||||
static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
@@ -1658,9 +1742,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
|
||||
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
|
||||
struct ublk_queue *ubq = pdu->ubq;
|
||||
struct task_struct *task;
|
||||
struct ublk_device *ub;
|
||||
bool need_schedule;
|
||||
struct ublk_io *io;
|
||||
|
||||
if (WARN_ON_ONCE(!ubq))
|
||||
return;
|
||||
@@ -1672,16 +1753,11 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
|
||||
if (WARN_ON_ONCE(task && task != ubq->ubq_daemon))
|
||||
return;
|
||||
|
||||
ub = ubq->dev;
|
||||
need_schedule = ublk_abort_requests(ub, ubq);
|
||||
if (!ubq->canceling)
|
||||
ublk_start_cancel(ubq);
|
||||
|
||||
io = &ubq->ios[pdu->tag];
|
||||
WARN_ON_ONCE(io->cmd != cmd);
|
||||
ublk_cancel_cmd(ubq, io, issue_flags);
|
||||
|
||||
if (need_schedule) {
|
||||
schedule_work(&ub->nosrv_work);
|
||||
}
|
||||
WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd);
|
||||
ublk_cancel_cmd(ubq, pdu->tag, issue_flags);
|
||||
}
|
||||
|
||||
static inline bool ublk_queue_ready(struct ublk_queue *ubq)
|
||||
@@ -1694,7 +1770,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ubq->q_depth; i++)
|
||||
ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED);
|
||||
ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED);
|
||||
}
|
||||
|
||||
/* Cancel all pending commands, must be called after del_gendisk() returns */
|
||||
@@ -1732,33 +1808,20 @@ static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub)
|
||||
}
|
||||
}
|
||||
|
||||
static void __ublk_quiesce_dev(struct ublk_device *ub)
|
||||
static void ublk_force_abort_dev(struct ublk_device *ub)
|
||||
{
|
||||
pr_devel("%s: quiesce ub: dev_id %d state %s\n",
|
||||
int i;
|
||||
|
||||
pr_devel("%s: force abort ub: dev_id %d state %s\n",
|
||||
__func__, ub->dev_info.dev_id,
|
||||
ub->dev_info.state == UBLK_S_DEV_LIVE ?
|
||||
"LIVE" : "QUIESCED");
|
||||
blk_mq_quiesce_queue(ub->ub_disk->queue);
|
||||
ublk_wait_tagset_rqs_idle(ub);
|
||||
ub->dev_info.state = UBLK_S_DEV_QUIESCED;
|
||||
}
|
||||
if (ub->dev_info.state == UBLK_S_DEV_LIVE)
|
||||
ublk_wait_tagset_rqs_idle(ub);
|
||||
|
||||
static void ublk_unquiesce_dev(struct ublk_device *ub)
|
||||
{
|
||||
int i;
|
||||
|
||||
pr_devel("%s: unquiesce ub: dev_id %d state %s\n",
|
||||
__func__, ub->dev_info.dev_id,
|
||||
ub->dev_info.state == UBLK_S_DEV_LIVE ?
|
||||
"LIVE" : "QUIESCED");
|
||||
/* quiesce_work has run. We let requeued rqs be aborted
|
||||
* before running fallback_wq. "force_abort" must be seen
|
||||
* after request queue is unqiuesced. Then del_gendisk()
|
||||
* can move on.
|
||||
*/
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
|
||||
ublk_get_queue(ub, i)->force_abort = true;
|
||||
|
||||
blk_mq_unquiesce_queue(ub->ub_disk->queue);
|
||||
/* We may have requeued some rqs in ublk_quiesce_queue() */
|
||||
blk_mq_kick_requeue_list(ub->ub_disk->queue);
|
||||
@@ -1779,61 +1842,51 @@ static struct gendisk *ublk_detach_disk(struct ublk_device *ub)
|
||||
return disk;
|
||||
}
|
||||
|
||||
static void ublk_stop_dev(struct ublk_device *ub)
|
||||
static void ublk_stop_dev_unlocked(struct ublk_device *ub)
|
||||
__must_hold(&ub->mutex)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (ub->dev_info.state == UBLK_S_DEV_DEAD)
|
||||
goto unlock;
|
||||
if (ublk_nosrv_dev_should_queue_io(ub)) {
|
||||
if (ub->dev_info.state == UBLK_S_DEV_LIVE)
|
||||
__ublk_quiesce_dev(ub);
|
||||
ublk_unquiesce_dev(ub);
|
||||
}
|
||||
return;
|
||||
|
||||
if (ublk_nosrv_dev_should_queue_io(ub))
|
||||
ublk_force_abort_dev(ub);
|
||||
del_gendisk(ub->ub_disk);
|
||||
disk = ublk_detach_disk(ub);
|
||||
put_disk(disk);
|
||||
unlock:
|
||||
}
|
||||
|
||||
static void ublk_stop_dev(struct ublk_device *ub)
|
||||
{
|
||||
mutex_lock(&ub->mutex);
|
||||
ublk_stop_dev_unlocked(ub);
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_cancel_dev(ub);
|
||||
}
|
||||
|
||||
static void ublk_nosrv_work(struct work_struct *work)
|
||||
/* reset ublk io_uring queue & io flags */
|
||||
static void ublk_reset_io_flags(struct ublk_device *ub)
|
||||
{
|
||||
struct ublk_device *ub =
|
||||
container_of(work, struct ublk_device, nosrv_work);
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
if (ublk_nosrv_should_stop_dev(ub)) {
|
||||
ublk_stop_dev(ub);
|
||||
return;
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
|
||||
struct ublk_queue *ubq = ublk_get_queue(ub, i);
|
||||
|
||||
/* UBLK_IO_FLAG_CANCELED can be cleared now */
|
||||
spin_lock(&ubq->cancel_lock);
|
||||
for (j = 0; j < ubq->q_depth; j++)
|
||||
ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED;
|
||||
spin_unlock(&ubq->cancel_lock);
|
||||
ubq->canceling = false;
|
||||
ubq->fail_io = false;
|
||||
}
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (ub->dev_info.state != UBLK_S_DEV_LIVE)
|
||||
goto unlock;
|
||||
|
||||
if (ublk_nosrv_dev_should_queue_io(ub)) {
|
||||
__ublk_quiesce_dev(ub);
|
||||
} else {
|
||||
blk_mq_quiesce_queue(ub->ub_disk->queue);
|
||||
ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
|
||||
ublk_get_queue(ub, i)->fail_io = true;
|
||||
}
|
||||
blk_mq_unquiesce_queue(ub->ub_disk->queue);
|
||||
}
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_cancel_dev(ub);
|
||||
}
|
||||
|
||||
/* device can only be started after all IOs are ready */
|
||||
static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
__must_hold(&ub->mutex)
|
||||
{
|
||||
mutex_lock(&ub->mutex);
|
||||
ubq->nr_io_ready++;
|
||||
if (ublk_queue_ready(ubq)) {
|
||||
ubq->ubq_daemon = current;
|
||||
@@ -1843,18 +1896,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
if (capable(CAP_SYS_ADMIN))
|
||||
ub->nr_privileged_daemon++;
|
||||
}
|
||||
if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
|
||||
|
||||
if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) {
|
||||
/* now we are ready for handling ublk io request */
|
||||
ublk_reset_io_flags(ub);
|
||||
complete_all(&ub->completion);
|
||||
mutex_unlock(&ub->mutex);
|
||||
}
|
||||
|
||||
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
|
||||
int tag)
|
||||
{
|
||||
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
|
||||
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
|
||||
|
||||
ublk_queue_cmd(ubq, req);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int ublk_check_cmd_op(u32 cmd_op)
|
||||
@@ -1902,13 +1949,20 @@ static void ublk_io_release(void *priv)
|
||||
}
|
||||
|
||||
static int ublk_register_io_buf(struct io_uring_cmd *cmd,
|
||||
struct ublk_queue *ubq, unsigned int tag,
|
||||
const struct ublk_queue *ubq, unsigned int tag,
|
||||
unsigned int index, unsigned int issue_flags)
|
||||
{
|
||||
struct ublk_device *ub = cmd->file->private_data;
|
||||
const struct ublk_io *io = &ubq->ios[tag];
|
||||
struct request *req;
|
||||
int ret;
|
||||
|
||||
if (!ublk_support_zero_copy(ubq))
|
||||
return -EINVAL;
|
||||
|
||||
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
|
||||
return -EINVAL;
|
||||
|
||||
req = __ublk_check_and_get_req(ub, ubq, tag, 0);
|
||||
if (!req)
|
||||
return -EINVAL;
|
||||
@@ -1924,11 +1978,66 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd,
|
||||
}
|
||||
|
||||
static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
|
||||
const struct ublk_queue *ubq, unsigned int tag,
|
||||
unsigned int index, unsigned int issue_flags)
|
||||
{
|
||||
const struct ublk_io *io = &ubq->ios[tag];
|
||||
|
||||
if (!ublk_support_zero_copy(ubq))
|
||||
return -EINVAL;
|
||||
|
||||
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
|
||||
return -EINVAL;
|
||||
|
||||
return io_buffer_unregister_bvec(cmd, index, issue_flags);
|
||||
}
|
||||
|
||||
static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
|
||||
struct ublk_io *io, __u64 buf_addr)
|
||||
{
|
||||
struct ublk_device *ub = ubq->dev;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* When handling FETCH command for setting up ublk uring queue,
|
||||
* ub->mutex is the innermost lock, and we won't block for handling
|
||||
* FETCH, so it is fine even for IO_URING_F_NONBLOCK.
|
||||
*/
|
||||
mutex_lock(&ub->mutex);
|
||||
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
|
||||
if (ublk_queue_ready(ubq)) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* allow each command to be FETCHed at most once */
|
||||
if (io->flags & UBLK_IO_FLAG_ACTIVE) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV);
|
||||
|
||||
if (ublk_need_map_io(ubq)) {
|
||||
/*
|
||||
* FETCH_RQ has to provide IO buffer if NEED GET
|
||||
* DATA is not enabled
|
||||
*/
|
||||
if (!buf_addr && !ublk_need_get_data(ubq))
|
||||
goto out;
|
||||
} else if (buf_addr) {
|
||||
/* User copy requires addr to be unset */
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ublk_fill_io_cmd(io, cmd, buf_addr);
|
||||
ublk_mark_io_ready(ub, ubq);
|
||||
out:
|
||||
mutex_unlock(&ub->mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags,
|
||||
const struct ublksrv_io_cmd *ub_cmd)
|
||||
@@ -1983,35 +2092,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
case UBLK_IO_REGISTER_IO_BUF:
|
||||
return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
|
||||
case UBLK_IO_UNREGISTER_IO_BUF:
|
||||
return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
|
||||
return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
|
||||
case UBLK_IO_FETCH_REQ:
|
||||
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
|
||||
if (ublk_queue_ready(ubq)) {
|
||||
ret = -EBUSY;
|
||||
ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* The io is being handled by server, so COMMIT_RQ is expected
|
||||
* instead of FETCH_REQ
|
||||
*/
|
||||
if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
|
||||
goto out;
|
||||
|
||||
if (ublk_need_map_io(ubq)) {
|
||||
/*
|
||||
* FETCH_RQ has to provide IO buffer if NEED GET
|
||||
* DATA is not enabled
|
||||
*/
|
||||
if (!ub_cmd->addr && !ublk_need_get_data(ubq))
|
||||
goto out;
|
||||
} else if (ub_cmd->addr) {
|
||||
/* User copy requires addr to be unset */
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
|
||||
ublk_mark_io_ready(ub, ubq);
|
||||
break;
|
||||
case UBLK_IO_COMMIT_AND_FETCH_REQ:
|
||||
req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
|
||||
@@ -2043,8 +2128,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
|
||||
goto out;
|
||||
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
|
||||
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
|
||||
break;
|
||||
req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
|
||||
ublk_dispatch_req(ubq, req, issue_flags);
|
||||
return -EIOCBQUEUED;
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
@@ -2058,13 +2144,10 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
}
|
||||
|
||||
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
|
||||
struct ublk_queue *ubq, int tag, size_t offset)
|
||||
const struct ublk_queue *ubq, int tag, size_t offset)
|
||||
{
|
||||
struct request *req;
|
||||
|
||||
if (!ublk_need_req_ref(ubq))
|
||||
return NULL;
|
||||
|
||||
req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
|
||||
if (!req)
|
||||
return NULL;
|
||||
@@ -2178,6 +2261,9 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb,
|
||||
if (!ubq)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (!ublk_support_user_copy(ubq))
|
||||
return ERR_PTR(-EACCES);
|
||||
|
||||
if (tag >= ubq->q_depth)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
@@ -2411,7 +2497,6 @@ static void ublk_remove(struct ublk_device *ub)
|
||||
bool unprivileged;
|
||||
|
||||
ublk_stop_dev(ub);
|
||||
cancel_work_sync(&ub->nosrv_work);
|
||||
cdev_device_del(&ub->cdev, &ub->cdev_dev);
|
||||
unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
|
||||
ublk_put_device(ub);
|
||||
@@ -2696,7 +2781,6 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
|
||||
goto out_unlock;
|
||||
mutex_init(&ub->mutex);
|
||||
spin_lock_init(&ub->lock);
|
||||
INIT_WORK(&ub->nosrv_work, ublk_nosrv_work);
|
||||
|
||||
ret = ublk_alloc_dev_number(ub, header->dev_id);
|
||||
if (ret < 0)
|
||||
@@ -2718,13 +2802,18 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
|
||||
ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE |
|
||||
UBLK_F_URING_CMD_COMP_IN_TASK;
|
||||
|
||||
/* GET_DATA isn't needed any more with USER_COPY */
|
||||
if (ublk_dev_is_user_copy(ub))
|
||||
/* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
|
||||
if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
|
||||
ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
|
||||
|
||||
/* Zoned storage support requires user copy feature */
|
||||
/*
|
||||
* Zoned storage support requires reuse `ublksrv_io_cmd->addr` for
|
||||
* returning write_append_lba, which is only allowed in case of
|
||||
* user copy or zero copy
|
||||
*/
|
||||
if (ublk_dev_is_zoned(ub) &&
|
||||
(!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !ublk_dev_is_user_copy(ub))) {
|
||||
(!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags &
|
||||
(UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) {
|
||||
ret = -EINVAL;
|
||||
goto out_free_dev_number;
|
||||
}
|
||||
@@ -2828,7 +2917,6 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
|
||||
static int ublk_ctrl_stop_dev(struct ublk_device *ub)
|
||||
{
|
||||
ublk_stop_dev(ub);
|
||||
cancel_work_sync(&ub->nosrv_work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2932,42 +3020,14 @@ static int ublk_ctrl_set_params(struct ublk_device *ub,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
{
|
||||
int i;
|
||||
|
||||
WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq)));
|
||||
|
||||
/* All old ioucmds have to be completed */
|
||||
ubq->nr_io_ready = 0;
|
||||
/* old daemon is PF_EXITING, put it now */
|
||||
put_task_struct(ubq->ubq_daemon);
|
||||
/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
|
||||
ubq->ubq_daemon = NULL;
|
||||
ubq->timeout = false;
|
||||
ubq->canceling = false;
|
||||
|
||||
for (i = 0; i < ubq->q_depth; i++) {
|
||||
struct ublk_io *io = &ubq->ios[i];
|
||||
|
||||
/* forget everything now and be ready for new FETCH_REQ */
|
||||
io->flags = 0;
|
||||
io->cmd = NULL;
|
||||
io->addr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int ublk_ctrl_start_recovery(struct ublk_device *ub,
|
||||
const struct ublksrv_ctrl_cmd *header)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
int i;
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (ublk_nosrv_should_stop_dev(ub))
|
||||
goto out_unlock;
|
||||
if (!ub->nr_queues_ready)
|
||||
goto out_unlock;
|
||||
/*
|
||||
* START_RECOVERY is only allowd after:
|
||||
*
|
||||
@@ -2991,12 +3051,6 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
|
||||
goto out_unlock;
|
||||
}
|
||||
pr_devel("%s: start recovery for dev id %d.\n", __func__, header->dev_id);
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
|
||||
ublk_queue_reinit(ub, ublk_get_queue(ub, i));
|
||||
/* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
|
||||
ub->mm = NULL;
|
||||
ub->nr_queues_ready = 0;
|
||||
ub->nr_privileged_daemon = 0;
|
||||
init_completion(&ub->completion);
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
@@ -3009,7 +3063,6 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
|
||||
{
|
||||
int ublksrv_pid = (int)header->data[0];
|
||||
int ret = -EINVAL;
|
||||
int i;
|
||||
|
||||
pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
|
||||
__func__, ub->dev_info.nr_hw_queues, header->dev_id);
|
||||
@@ -3029,24 +3082,10 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
|
||||
goto out_unlock;
|
||||
}
|
||||
ub->dev_info.ublksrv_pid = ublksrv_pid;
|
||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||
pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
|
||||
__func__, ublksrv_pid, header->dev_id);
|
||||
|
||||
if (ublk_nosrv_dev_should_queue_io(ub)) {
|
||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||
blk_mq_unquiesce_queue(ub->ub_disk->queue);
|
||||
pr_devel("%s: queue unquiesced, dev id %d.\n",
|
||||
__func__, header->dev_id);
|
||||
blk_mq_kick_requeue_list(ub->ub_disk->queue);
|
||||
} else {
|
||||
blk_mq_quiesce_queue(ub->ub_disk->queue);
|
||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
|
||||
ublk_get_queue(ub, i)->fail_io = false;
|
||||
}
|
||||
blk_mq_unquiesce_queue(ub->ub_disk->queue);
|
||||
}
|
||||
|
||||
blk_mq_kick_requeue_list(ub->ub_disk->queue);
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
|
||||
@@ -2357,9 +2357,8 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
|
||||
|
||||
if (!bitmap)
|
||||
return -ENOENT;
|
||||
if (bitmap->mddev->bitmap_info.external)
|
||||
return -ENOENT;
|
||||
if (!bitmap->storage.sb_page) /* no superblock */
|
||||
if (!bitmap->mddev->bitmap_info.external &&
|
||||
!bitmap->storage.sb_page)
|
||||
return -EINVAL;
|
||||
sb = kmap_local_page(bitmap->storage.sb_page);
|
||||
stats->sync_size = le64_to_cpu(sb->sync_size);
|
||||
|
||||
@@ -2200,14 +2200,9 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
|
||||
if (!rdev_set_badblocks(rdev, sect, s, 0))
|
||||
abort = 1;
|
||||
}
|
||||
if (abort) {
|
||||
conf->recovery_disabled =
|
||||
mddev->recovery_disabled;
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_done_sync(mddev, r1_bio->sectors, 0);
|
||||
put_buf(r1_bio);
|
||||
if (abort)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Try next page */
|
||||
sectors -= s;
|
||||
sect += s;
|
||||
@@ -2346,10 +2341,21 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
|
||||
int disks = conf->raid_disks * 2;
|
||||
struct bio *wbio;
|
||||
|
||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
|
||||
/* ouch - failed to read all of that. */
|
||||
if (!fix_sync_read_error(r1_bio))
|
||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
||||
/*
|
||||
* ouch - failed to read all of that.
|
||||
* No need to fix read error for check/repair
|
||||
* because all member disks are read.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) ||
|
||||
!fix_sync_read_error(r1_bio)) {
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_done_sync(mddev, r1_bio->sectors, 0);
|
||||
put_buf(r1_bio);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
process_checks(r1_bio);
|
||||
|
||||
@@ -1735,6 +1735,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
* The discard bio returns only first r10bio finishes
|
||||
*/
|
||||
if (first_copy) {
|
||||
md_account_bio(mddev, &bio);
|
||||
r10_bio->master_bio = bio;
|
||||
set_bit(R10BIO_Discard, &r10_bio->state);
|
||||
first_copy = false;
|
||||
|
||||
@@ -102,6 +102,7 @@ config NVME_TCP_TLS
|
||||
depends on NVME_TCP
|
||||
select NET_HANDSHAKE
|
||||
select KEYS
|
||||
select TLS
|
||||
help
|
||||
Enables TLS encryption for NVMe TCP using the netlink handshake API.
|
||||
|
||||
|
||||
@@ -4300,7 +4300,7 @@ static void nvme_scan_work(struct work_struct *work)
|
||||
if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events))
|
||||
nvme_queue_scan(ctrl);
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
else
|
||||
else if (ctrl->ana_log_buf)
|
||||
/* Re-read the ANA log page to not miss updates */
|
||||
queue_work(nvme_wq, &ctrl->ana_work);
|
||||
#endif
|
||||
@@ -4493,7 +4493,8 @@ static void nvme_fw_act_work(struct work_struct *work)
|
||||
msleep(100);
|
||||
}
|
||||
|
||||
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
|
||||
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING) ||
|
||||
!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
|
||||
return;
|
||||
|
||||
nvme_unquiesce_io_queues(ctrl);
|
||||
|
||||
@@ -1050,6 +1050,13 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
|
||||
srcu_idx = srcu_read_lock(&head->srcu);
|
||||
|
||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
||||
/*
|
||||
* Ensure that ns path disk node is already added otherwise we
|
||||
* may get invalid kobj name for target
|
||||
*/
|
||||
if (!test_bit(GD_ADDED, &ns->disk->state))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Avoid creating link if it already exists for the given path.
|
||||
* When path ana state transitions from optimized to non-
|
||||
@@ -1065,13 +1072,6 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
|
||||
if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Ensure that ns path disk node is already added otherwise we
|
||||
* may get invalid kobj name for target
|
||||
*/
|
||||
if (!test_bit(GD_ADDED, &ns->disk->state))
|
||||
continue;
|
||||
|
||||
target = disk_to_dev(ns->disk);
|
||||
/*
|
||||
* Create sysfs link from head gendisk kobject @kobj to the
|
||||
|
||||
@@ -3575,7 +3575,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
|
||||
|
||||
dev_info(dev->ctrl.device, "restart after slot reset\n");
|
||||
pci_restore_state(pdev);
|
||||
if (!nvme_try_sched_reset(&dev->ctrl))
|
||||
if (nvme_try_sched_reset(&dev->ctrl))
|
||||
nvme_unquiesce_io_queues(&dev->ctrl);
|
||||
return PCI_ERS_RESULT_RECOVERED;
|
||||
}
|
||||
@@ -3623,6 +3623,9 @@ static const struct pci_device_id nvme_id_table[] = {
|
||||
.driver_data = NVME_QUIRK_BOGUS_NID, },
|
||||
{ PCI_DEVICE(0x1217, 0x8760), /* O2 Micro 64GB Steam Deck */
|
||||
.driver_data = NVME_QUIRK_DMAPOOL_ALIGN_512, },
|
||||
{ PCI_DEVICE(0x126f, 0x1001), /* Silicon Motion generic */
|
||||
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
|
||||
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
||||
{ PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */
|
||||
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
|
||||
NVME_QUIRK_BOGUS_NID, },
|
||||
@@ -3646,6 +3649,9 @@ static const struct pci_device_id nvme_id_table[] = {
|
||||
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
||||
{ PCI_DEVICE(0x15b7, 0x5008), /* Sandisk SN530 */
|
||||
.driver_data = NVME_QUIRK_BROKEN_MSI },
|
||||
{ PCI_DEVICE(0x15b7, 0x5009), /* Sandisk SN550 */
|
||||
.driver_data = NVME_QUIRK_BROKEN_MSI |
|
||||
NVME_QUIRK_NO_DEEPEST_PS },
|
||||
{ PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */
|
||||
.driver_data = NVME_QUIRK_BOGUS_NID, },
|
||||
{ PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
|
||||
|
||||
@@ -1946,7 +1946,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
|
||||
cancel_work_sync(&queue->io_work);
|
||||
}
|
||||
|
||||
static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
|
||||
static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
|
||||
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
|
||||
@@ -1965,6 +1965,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
|
||||
mutex_unlock(&queue->queue_lock);
|
||||
}
|
||||
|
||||
static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
|
||||
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
|
||||
int timeout = 100;
|
||||
|
||||
while (timeout > 0) {
|
||||
if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) ||
|
||||
!sk_wmem_alloc_get(queue->sock->sk))
|
||||
return;
|
||||
msleep(2);
|
||||
timeout -= 2;
|
||||
}
|
||||
dev_warn(nctrl->device,
|
||||
"qid %d: timeout draining sock wmem allocation expired\n",
|
||||
qid);
|
||||
}
|
||||
|
||||
static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
|
||||
{
|
||||
nvme_tcp_stop_queue_nowait(nctrl, qid);
|
||||
nvme_tcp_wait_queue(nctrl, qid);
|
||||
}
|
||||
|
||||
|
||||
static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue)
|
||||
{
|
||||
write_lock_bh(&queue->sock->sk->sk_callback_lock);
|
||||
@@ -2032,7 +2057,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
|
||||
int i;
|
||||
|
||||
for (i = 1; i < ctrl->queue_count; i++)
|
||||
nvme_tcp_stop_queue(ctrl, i);
|
||||
nvme_tcp_stop_queue_nowait(ctrl, i);
|
||||
for (i = 1; i < ctrl->queue_count; i++)
|
||||
nvme_tcp_wait_queue(ctrl, i);
|
||||
}
|
||||
|
||||
static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl,
|
||||
|
||||
@@ -98,6 +98,7 @@ config NVME_TARGET_TCP_TLS
|
||||
bool "NVMe over Fabrics TCP target TLS encryption support"
|
||||
depends on NVME_TARGET_TCP
|
||||
select NET_HANDSHAKE
|
||||
select TLS
|
||||
help
|
||||
Enables TLS encryption for the NVMe TCP target using the netlink handshake API.
|
||||
|
||||
|
||||
@@ -240,7 +240,7 @@ void nvmet_auth_sq_free(struct nvmet_sq *sq)
|
||||
{
|
||||
cancel_delayed_work(&sq->auth_expired_work);
|
||||
#ifdef CONFIG_NVME_TARGET_TCP_TLS
|
||||
sq->tls_key = 0;
|
||||
sq->tls_key = NULL;
|
||||
#endif
|
||||
kfree(sq->dhchap_c1);
|
||||
sq->dhchap_c1 = NULL;
|
||||
@@ -600,13 +600,12 @@ void nvmet_auth_insert_psk(struct nvmet_sq *sq)
|
||||
pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n",
|
||||
__func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key));
|
||||
tls_key = NULL;
|
||||
kfree_sensitive(tls_psk);
|
||||
}
|
||||
if (sq->ctrl->tls_key)
|
||||
key_put(sq->ctrl->tls_key);
|
||||
sq->ctrl->tls_key = tls_key;
|
||||
#endif
|
||||
|
||||
kfree_sensitive(tls_psk);
|
||||
out_free_digest:
|
||||
kfree_sensitive(digest);
|
||||
out_free_psk:
|
||||
|
||||
@@ -324,6 +324,9 @@ int nvmet_enable_port(struct nvmet_port *port)
|
||||
|
||||
lockdep_assert_held(&nvmet_config_sem);
|
||||
|
||||
if (port->disc_addr.trtype == NVMF_TRTYPE_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
ops = nvmet_transports[port->disc_addr.trtype];
|
||||
if (!ops) {
|
||||
up_write(&nvmet_config_sem);
|
||||
|
||||
@@ -1648,16 +1648,17 @@ static int nvmet_pci_epf_process_sq(struct nvmet_pci_epf_ctrl *ctrl,
|
||||
{
|
||||
struct nvmet_pci_epf_iod *iod;
|
||||
int ret, n = 0;
|
||||
u16 head = sq->head;
|
||||
|
||||
sq->tail = nvmet_pci_epf_bar_read32(ctrl, sq->db);
|
||||
while (sq->head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) {
|
||||
while (head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) {
|
||||
iod = nvmet_pci_epf_alloc_iod(sq);
|
||||
if (!iod)
|
||||
break;
|
||||
|
||||
/* Get the NVMe command submitted by the host. */
|
||||
ret = nvmet_pci_epf_transfer(ctrl, &iod->cmd,
|
||||
sq->pci_addr + sq->head * sq->qes,
|
||||
sq->pci_addr + head * sq->qes,
|
||||
sq->qes, DMA_FROM_DEVICE);
|
||||
if (ret) {
|
||||
/* Not much we can do... */
|
||||
@@ -1666,12 +1667,13 @@ static int nvmet_pci_epf_process_sq(struct nvmet_pci_epf_ctrl *ctrl,
|
||||
}
|
||||
|
||||
dev_dbg(ctrl->dev, "SQ[%u]: head %u, tail %u, command %s\n",
|
||||
sq->qid, sq->head, sq->tail,
|
||||
sq->qid, head, sq->tail,
|
||||
nvmet_pci_epf_iod_name(iod));
|
||||
|
||||
sq->head++;
|
||||
if (sq->head == sq->depth)
|
||||
sq->head = 0;
|
||||
head++;
|
||||
if (head == sq->depth)
|
||||
head = 0;
|
||||
WRITE_ONCE(sq->head, head);
|
||||
n++;
|
||||
|
||||
queue_work_on(WORK_CPU_UNBOUND, sq->iod_wq, &iod->work);
|
||||
@@ -1761,8 +1763,17 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
|
||||
if (!iod)
|
||||
break;
|
||||
|
||||
/* Post the IOD completion entry. */
|
||||
/*
|
||||
* Post the IOD completion entry. If the IOD request was
|
||||
* executed (req->execute() called), the CQE is already
|
||||
* initialized. However, the IOD may have been failed before
|
||||
* that, leaving the CQE not properly initialized. So always
|
||||
* initialize it here.
|
||||
*/
|
||||
cqe = &iod->cqe;
|
||||
cqe->sq_head = cpu_to_le16(READ_ONCE(iod->sq->head));
|
||||
cqe->sq_id = cpu_to_le16(iod->sq->qid);
|
||||
cqe->command_id = iod->cmd.common.command_id;
|
||||
cqe->status = cpu_to_le16((iod->status << 1) | cq->phase);
|
||||
|
||||
dev_dbg(ctrl->dev,
|
||||
@@ -1800,6 +1811,21 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
|
||||
NVMET_PCI_EPF_CQ_RETRY_INTERVAL);
|
||||
}
|
||||
|
||||
static void nvmet_pci_epf_clear_ctrl_config(struct nvmet_pci_epf_ctrl *ctrl)
|
||||
{
|
||||
struct nvmet_ctrl *tctrl = ctrl->tctrl;
|
||||
|
||||
/* Initialize controller status. */
|
||||
tctrl->csts = 0;
|
||||
ctrl->csts = 0;
|
||||
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts);
|
||||
|
||||
/* Initialize controller configuration and start polling. */
|
||||
tctrl->cc = 0;
|
||||
ctrl->cc = 0;
|
||||
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc);
|
||||
}
|
||||
|
||||
static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
|
||||
{
|
||||
u64 pci_addr, asq, acq;
|
||||
@@ -1865,18 +1891,20 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
|
||||
return 0;
|
||||
|
||||
err:
|
||||
ctrl->csts = 0;
|
||||
nvmet_pci_epf_clear_ctrl_config(ctrl);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
|
||||
static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl,
|
||||
bool shutdown)
|
||||
{
|
||||
int qid;
|
||||
|
||||
if (!ctrl->enabled)
|
||||
return;
|
||||
|
||||
dev_info(ctrl->dev, "Disabling controller\n");
|
||||
dev_info(ctrl->dev, "%s controller\n",
|
||||
shutdown ? "Shutting down" : "Disabling");
|
||||
|
||||
ctrl->enabled = false;
|
||||
cancel_delayed_work_sync(&ctrl->poll_sqs);
|
||||
@@ -1893,6 +1921,11 @@ static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
|
||||
nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
|
||||
|
||||
ctrl->csts &= ~NVME_CSTS_RDY;
|
||||
if (shutdown) {
|
||||
ctrl->csts |= NVME_CSTS_SHST_CMPLT;
|
||||
ctrl->cc &= ~NVME_CC_ENABLE;
|
||||
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc);
|
||||
}
|
||||
}
|
||||
|
||||
static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
|
||||
@@ -1919,12 +1952,10 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
|
||||
}
|
||||
|
||||
if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc))
|
||||
nvmet_pci_epf_disable_ctrl(ctrl);
|
||||
nvmet_pci_epf_disable_ctrl(ctrl, false);
|
||||
|
||||
if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) {
|
||||
nvmet_pci_epf_disable_ctrl(ctrl);
|
||||
ctrl->csts |= NVME_CSTS_SHST_CMPLT;
|
||||
}
|
||||
if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc))
|
||||
nvmet_pci_epf_disable_ctrl(ctrl, true);
|
||||
|
||||
if (!nvmet_cc_shn(new_cc) && nvmet_cc_shn(old_cc))
|
||||
ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
|
||||
@@ -1963,16 +1994,10 @@ static void nvmet_pci_epf_init_bar(struct nvmet_pci_epf_ctrl *ctrl)
|
||||
/* Clear Controller Memory Buffer Supported (CMBS). */
|
||||
ctrl->cap &= ~(0x1ULL << 57);
|
||||
|
||||
/* Controller configuration. */
|
||||
ctrl->cc = tctrl->cc & (~NVME_CC_ENABLE);
|
||||
|
||||
/* Controller status. */
|
||||
ctrl->csts = ctrl->tctrl->csts;
|
||||
|
||||
nvmet_pci_epf_bar_write64(ctrl, NVME_REG_CAP, ctrl->cap);
|
||||
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_VS, tctrl->subsys->ver);
|
||||
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts);
|
||||
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc);
|
||||
|
||||
nvmet_pci_epf_clear_ctrl_config(ctrl);
|
||||
}
|
||||
|
||||
static int nvmet_pci_epf_create_ctrl(struct nvmet_pci_epf *nvme_epf,
|
||||
@@ -2070,14 +2095,22 @@ static int nvmet_pci_epf_create_ctrl(struct nvmet_pci_epf *nvme_epf,
|
||||
|
||||
static void nvmet_pci_epf_start_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
|
||||
{
|
||||
|
||||
dev_info(ctrl->dev, "PCI link up\n");
|
||||
ctrl->link_up = true;
|
||||
|
||||
schedule_delayed_work(&ctrl->poll_cc, NVMET_PCI_EPF_CC_POLL_INTERVAL);
|
||||
}
|
||||
|
||||
static void nvmet_pci_epf_stop_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
|
||||
{
|
||||
dev_info(ctrl->dev, "PCI link down\n");
|
||||
ctrl->link_up = false;
|
||||
|
||||
cancel_delayed_work_sync(&ctrl->poll_cc);
|
||||
|
||||
nvmet_pci_epf_disable_ctrl(ctrl);
|
||||
nvmet_pci_epf_disable_ctrl(ctrl, false);
|
||||
nvmet_pci_epf_clear_ctrl_config(ctrl);
|
||||
}
|
||||
|
||||
static void nvmet_pci_epf_destroy_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
|
||||
@@ -2300,10 +2333,8 @@ static int nvmet_pci_epf_epc_init(struct pci_epf *epf)
|
||||
if (ret)
|
||||
goto out_clear_bar;
|
||||
|
||||
if (!epc_features->linkup_notifier) {
|
||||
ctrl->link_up = true;
|
||||
if (!epc_features->linkup_notifier)
|
||||
nvmet_pci_epf_start_ctrl(&nvme_epf->ctrl);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -2319,7 +2350,6 @@ static void nvmet_pci_epf_epc_deinit(struct pci_epf *epf)
|
||||
struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf);
|
||||
struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl;
|
||||
|
||||
ctrl->link_up = false;
|
||||
nvmet_pci_epf_destroy_ctrl(ctrl);
|
||||
|
||||
nvmet_pci_epf_deinit_dma(nvme_epf);
|
||||
@@ -2331,7 +2361,6 @@ static int nvmet_pci_epf_link_up(struct pci_epf *epf)
|
||||
struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf);
|
||||
struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl;
|
||||
|
||||
ctrl->link_up = true;
|
||||
nvmet_pci_epf_start_ctrl(ctrl);
|
||||
|
||||
return 0;
|
||||
@@ -2342,7 +2371,6 @@ static int nvmet_pci_epf_link_down(struct pci_epf *epf)
|
||||
struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf);
|
||||
struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl;
|
||||
|
||||
ctrl->link_up = false;
|
||||
nvmet_pci_epf_stop_ctrl(ctrl);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1560,6 +1560,9 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue)
|
||||
{
|
||||
struct socket *sock = queue->sock;
|
||||
|
||||
if (!queue->state_change)
|
||||
return;
|
||||
|
||||
write_lock_bh(&sock->sk->sk_callback_lock);
|
||||
sock->sk->sk_data_ready = queue->data_ready;
|
||||
sock->sk->sk_state_change = queue->state_change;
|
||||
|
||||
@@ -1614,6 +1614,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
|
||||
return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev);
|
||||
}
|
||||
|
||||
int bdev_validate_blocksize(struct block_device *bdev, int block_size);
|
||||
int set_blocksize(struct file *file, int size);
|
||||
|
||||
int lookup_bdev(const char *pathname, dev_t *dev);
|
||||
@@ -1670,10 +1671,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder,
|
||||
const struct blk_holder_ops *hops);
|
||||
void bd_abort_claiming(struct block_device *bdev, void *holder);
|
||||
|
||||
/* just for blk-cgroup, don't use elsewhere */
|
||||
struct block_device *blkdev_get_no_open(dev_t dev);
|
||||
void blkdev_put_no_open(struct block_device *bdev);
|
||||
|
||||
struct block_device *I_BDEV(struct inode *inode);
|
||||
struct block_device *file_bdev(struct file *bdev_file);
|
||||
bool disk_live(struct gendisk *disk);
|
||||
|
||||
@@ -6,6 +6,10 @@ LDLIBS += -lpthread -lm -luring
|
||||
TEST_PROGS := test_generic_01.sh
|
||||
TEST_PROGS += test_generic_02.sh
|
||||
TEST_PROGS += test_generic_03.sh
|
||||
TEST_PROGS += test_generic_04.sh
|
||||
TEST_PROGS += test_generic_05.sh
|
||||
TEST_PROGS += test_generic_06.sh
|
||||
TEST_PROGS += test_generic_07.sh
|
||||
|
||||
TEST_PROGS += test_null_01.sh
|
||||
TEST_PROGS += test_null_02.sh
|
||||
@@ -21,12 +25,16 @@ TEST_PROGS += test_stripe_04.sh
|
||||
|
||||
TEST_PROGS += test_stress_01.sh
|
||||
TEST_PROGS += test_stress_02.sh
|
||||
TEST_PROGS += test_stress_03.sh
|
||||
TEST_PROGS += test_stress_04.sh
|
||||
TEST_PROGS += test_stress_05.sh
|
||||
|
||||
TEST_GEN_PROGS_EXTENDED = kublk
|
||||
|
||||
include ../lib.mk
|
||||
|
||||
$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c
|
||||
$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \
|
||||
fault_inject.c
|
||||
|
||||
check:
|
||||
shellcheck -x -f gcc *.sh
|
||||
|
||||
98
tools/testing/selftests/ublk/fault_inject.c
Normal file
98
tools/testing/selftests/ublk/fault_inject.c
Normal file
@@ -0,0 +1,98 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Fault injection ublk target. Hack this up however you like for
|
||||
* testing specific behaviors of ublk_drv. Currently is a null target
|
||||
* with a configurable delay before completing each I/O. This delay can
|
||||
* be used to test ublk_drv's handling of I/O outstanding to the ublk
|
||||
* server when it dies.
|
||||
*/
|
||||
|
||||
#include "kublk.h"
|
||||
|
||||
static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
|
||||
struct ublk_dev *dev)
|
||||
{
|
||||
const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
|
||||
unsigned long dev_size = 250UL << 30;
|
||||
|
||||
dev->tgt.dev_size = dev_size;
|
||||
dev->tgt.params = (struct ublk_params) {
|
||||
.types = UBLK_PARAM_TYPE_BASIC,
|
||||
.basic = {
|
||||
.logical_bs_shift = 9,
|
||||
.physical_bs_shift = 12,
|
||||
.io_opt_shift = 12,
|
||||
.io_min_shift = 9,
|
||||
.max_sectors = info->max_io_buf_bytes >> 9,
|
||||
.dev_sectors = dev_size >> 9,
|
||||
},
|
||||
};
|
||||
|
||||
dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
|
||||
{
|
||||
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
|
||||
struct io_uring_sqe *sqe;
|
||||
struct __kernel_timespec ts = {
|
||||
.tv_nsec = (long long)q->dev->private_data,
|
||||
};
|
||||
|
||||
ublk_queue_alloc_sqes(q, &sqe, 1);
|
||||
io_uring_prep_timeout(sqe, &ts, 1, 0);
|
||||
sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1);
|
||||
|
||||
ublk_queued_tgt_io(q, tag, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag,
|
||||
const struct io_uring_cqe *cqe)
|
||||
{
|
||||
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
|
||||
|
||||
if (cqe->res != -ETIME)
|
||||
ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res);
|
||||
|
||||
if (ublk_completed_tgt_io(q, tag))
|
||||
ublk_complete_io(q, tag, iod->nr_sectors << 9);
|
||||
else
|
||||
ublk_err("%s: io not complete after 1 cqe\n", __func__);
|
||||
}
|
||||
|
||||
static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "delay_us", 1, NULL, 0 },
|
||||
{ 0, 0, 0, 0 }
|
||||
};
|
||||
int option_idx, opt;
|
||||
|
||||
ctx->fault_inject.delay_us = 0;
|
||||
while ((opt = getopt_long(argc, argv, "",
|
||||
longopts, &option_idx)) != -1) {
|
||||
switch (opt) {
|
||||
case 0:
|
||||
if (!strcmp(longopts[option_idx].name, "delay_us"))
|
||||
ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops)
|
||||
{
|
||||
printf("\tfault_inject: [--delay_us us (default 0)]\n");
|
||||
}
|
||||
|
||||
const struct ublk_tgt_ops fault_inject_tgt_ops = {
|
||||
.name = "fault_inject",
|
||||
.init_tgt = ublk_fault_inject_tgt_init,
|
||||
.queue_io = ublk_fault_inject_queue_io,
|
||||
.tgt_io_done = ublk_fault_inject_tgt_io_done,
|
||||
.parse_cmd_line = ublk_fault_inject_cmd_line,
|
||||
.usage = ublk_fault_inject_usage,
|
||||
};
|
||||
@@ -5,22 +5,24 @@
|
||||
|
||||
#include "kublk.h"
|
||||
|
||||
#define MAX_NR_TGT_ARG 64
|
||||
|
||||
unsigned int ublk_dbg_mask = UBLK_LOG;
|
||||
static const struct ublk_tgt_ops *tgt_ops_list[] = {
|
||||
&null_tgt_ops,
|
||||
&loop_tgt_ops,
|
||||
&stripe_tgt_ops,
|
||||
&fault_inject_tgt_ops,
|
||||
};
|
||||
|
||||
static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
|
||||
{
|
||||
const struct ublk_tgt_ops *ops;
|
||||
int i;
|
||||
|
||||
if (name == NULL)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++)
|
||||
for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++)
|
||||
if (strcmp(tgt_ops_list[i]->name, name) == 0)
|
||||
return tgt_ops_list[i];
|
||||
return NULL;
|
||||
@@ -118,6 +120,27 @@ static int ublk_ctrl_start_dev(struct ublk_dev *dev,
|
||||
return __ublk_ctrl_cmd(dev, &data);
|
||||
}
|
||||
|
||||
static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev)
|
||||
{
|
||||
struct ublk_ctrl_cmd_data data = {
|
||||
.cmd_op = UBLK_U_CMD_START_USER_RECOVERY,
|
||||
};
|
||||
|
||||
return __ublk_ctrl_cmd(dev, &data);
|
||||
}
|
||||
|
||||
static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid)
|
||||
{
|
||||
struct ublk_ctrl_cmd_data data = {
|
||||
.cmd_op = UBLK_U_CMD_END_USER_RECOVERY,
|
||||
.flags = CTRL_CMD_HAS_DATA,
|
||||
};
|
||||
|
||||
dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
|
||||
|
||||
return __ublk_ctrl_cmd(dev, &data);
|
||||
}
|
||||
|
||||
static int ublk_ctrl_add_dev(struct ublk_dev *dev)
|
||||
{
|
||||
struct ublk_ctrl_cmd_data data = {
|
||||
@@ -207,10 +230,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev)
|
||||
};
|
||||
}
|
||||
|
||||
static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len)
|
||||
{
|
||||
unsigned done = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < CPU_SETSIZE; i++) {
|
||||
if (CPU_ISSET(i, set))
|
||||
done += snprintf(&buf[done], len - done, "%d ", i);
|
||||
}
|
||||
}
|
||||
|
||||
static void ublk_adjust_affinity(cpu_set_t *set)
|
||||
{
|
||||
int j, updated = 0;
|
||||
|
||||
/*
|
||||
* Just keep the 1st CPU now.
|
||||
*
|
||||
* In future, auto affinity selection can be tried.
|
||||
*/
|
||||
for (j = 0; j < CPU_SETSIZE; j++) {
|
||||
if (CPU_ISSET(j, set)) {
|
||||
if (!updated) {
|
||||
updated = 1;
|
||||
continue;
|
||||
}
|
||||
CPU_CLR(j, set);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Caller must free the allocated buffer */
|
||||
static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf)
|
||||
{
|
||||
struct ublk_ctrl_cmd_data data = {
|
||||
.cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY,
|
||||
.flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF,
|
||||
};
|
||||
cpu_set_t *buf;
|
||||
int i, ret;
|
||||
|
||||
buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) {
|
||||
data.data[0] = i;
|
||||
data.len = sizeof(cpu_set_t);
|
||||
data.addr = (__u64)&buf[i];
|
||||
|
||||
ret = __ublk_ctrl_cmd(ctrl_dev, &data);
|
||||
if (ret < 0) {
|
||||
free(buf);
|
||||
return ret;
|
||||
}
|
||||
ublk_adjust_affinity(&buf[i]);
|
||||
}
|
||||
|
||||
*ptr_buf = buf;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ublk_ctrl_dump(struct ublk_dev *dev)
|
||||
{
|
||||
struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
|
||||
struct ublk_params p;
|
||||
cpu_set_t *affinity;
|
||||
int ret;
|
||||
|
||||
ret = ublk_ctrl_get_params(dev, &p);
|
||||
@@ -219,12 +305,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)
|
||||
return;
|
||||
}
|
||||
|
||||
ret = ublk_ctrl_get_affinity(dev, &affinity);
|
||||
if (ret < 0) {
|
||||
ublk_err("failed to get affinity %m\n");
|
||||
return;
|
||||
}
|
||||
|
||||
ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
|
||||
info->dev_id, info->nr_hw_queues, info->queue_depth,
|
||||
1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
|
||||
ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
|
||||
info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
|
||||
ublk_dev_state_desc(dev));
|
||||
|
||||
if (affinity) {
|
||||
char buf[512];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < info->nr_hw_queues; i++) {
|
||||
ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
|
||||
printf("\tqueue %u: tid %d affinity(%s)\n",
|
||||
i, dev->q[i].tid, buf);
|
||||
}
|
||||
free(affinity);
|
||||
}
|
||||
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
@@ -347,7 +452,9 @@ static int ublk_queue_init(struct ublk_queue *q)
|
||||
}
|
||||
|
||||
ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
|
||||
IORING_SETUP_COOP_TASKRUN);
|
||||
IORING_SETUP_COOP_TASKRUN |
|
||||
IORING_SETUP_SINGLE_ISSUER |
|
||||
IORING_SETUP_DEFER_TASKRUN);
|
||||
if (ret < 0) {
|
||||
ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
|
||||
q->dev->dev_info.dev_id, q->q_id, ret);
|
||||
@@ -429,12 +536,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
|
||||
if (!(io->flags & UBLKSRV_IO_FREE))
|
||||
return 0;
|
||||
|
||||
/* we issue because we need either fetching or committing */
|
||||
/*
|
||||
* we issue because we need either fetching or committing or
|
||||
* getting data
|
||||
*/
|
||||
if (!(io->flags &
|
||||
(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
|
||||
(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA)))
|
||||
return 0;
|
||||
|
||||
if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
|
||||
if (io->flags & UBLKSRV_NEED_GET_DATA)
|
||||
cmd_op = UBLK_U_IO_NEED_GET_DATA;
|
||||
else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
|
||||
cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
|
||||
else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
|
||||
cmd_op = UBLK_U_IO_FETCH_REQ;
|
||||
@@ -551,6 +663,9 @@ static void ublk_handle_cqe(struct io_uring *r,
|
||||
assert(tag < q->q_depth);
|
||||
if (q->tgt_ops->queue_io)
|
||||
q->tgt_ops->queue_io(q, tag);
|
||||
} else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
|
||||
io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE;
|
||||
ublk_queue_io_cmd(q, io, tag);
|
||||
} else {
|
||||
/*
|
||||
* COMMIT_REQ will be completed immediately since no fetching
|
||||
@@ -602,9 +717,24 @@ static int ublk_process_io(struct ublk_queue *q)
|
||||
return reapped;
|
||||
}
|
||||
|
||||
static void ublk_queue_set_sched_affinity(const struct ublk_queue *q,
|
||||
cpu_set_t *cpuset)
|
||||
{
|
||||
if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
|
||||
ublk_err("ublk dev %u queue %u set affinity failed",
|
||||
q->dev->dev_info.dev_id, q->q_id);
|
||||
}
|
||||
|
||||
struct ublk_queue_info {
|
||||
struct ublk_queue *q;
|
||||
sem_t *queue_sem;
|
||||
cpu_set_t *affinity;
|
||||
};
|
||||
|
||||
static void *ublk_io_handler_fn(void *data)
|
||||
{
|
||||
struct ublk_queue *q = data;
|
||||
struct ublk_queue_info *info = data;
|
||||
struct ublk_queue *q = info->q;
|
||||
int dev_id = q->dev->dev_info.dev_id;
|
||||
int ret;
|
||||
|
||||
@@ -614,6 +744,10 @@ static void *ublk_io_handler_fn(void *data)
|
||||
dev_id, q->q_id);
|
||||
return NULL;
|
||||
}
|
||||
/* IO perf is sensitive with queue pthread affinity on NUMA machine*/
|
||||
ublk_queue_set_sched_affinity(q, info->affinity);
|
||||
sem_post(info->queue_sem);
|
||||
|
||||
ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
|
||||
q->tid, dev_id, q->q_id);
|
||||
|
||||
@@ -639,7 +773,7 @@ static void ublk_set_parameters(struct ublk_dev *dev)
|
||||
dev->dev_info.dev_id, ret);
|
||||
}
|
||||
|
||||
static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
|
||||
static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id)
|
||||
{
|
||||
uint64_t id;
|
||||
int evtfd = ctx->_evtfd;
|
||||
@@ -652,36 +786,68 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
|
||||
else
|
||||
id = ERROR_EVTFD_DEVID;
|
||||
|
||||
if (dev && ctx->shadow_dev)
|
||||
memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q));
|
||||
|
||||
if (write(evtfd, &id, sizeof(id)) != sizeof(id))
|
||||
return -EINVAL;
|
||||
|
||||
close(evtfd);
|
||||
shmdt(ctx->shadow_dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
|
||||
{
|
||||
int ret, i;
|
||||
void *thread_ret;
|
||||
const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
|
||||
struct ublk_queue_info *qinfo;
|
||||
cpu_set_t *affinity_buf;
|
||||
void *thread_ret;
|
||||
sem_t queue_sem;
|
||||
int ret, i;
|
||||
|
||||
ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
|
||||
|
||||
qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info),
|
||||
dinfo->nr_hw_queues);
|
||||
if (!qinfo)
|
||||
return -ENOMEM;
|
||||
|
||||
sem_init(&queue_sem, 0, 0);
|
||||
ret = ublk_dev_prep(ctx, dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = ublk_ctrl_get_affinity(dev, &affinity_buf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < dinfo->nr_hw_queues; i++) {
|
||||
dev->q[i].dev = dev;
|
||||
dev->q[i].q_id = i;
|
||||
|
||||
qinfo[i].q = &dev->q[i];
|
||||
qinfo[i].queue_sem = &queue_sem;
|
||||
qinfo[i].affinity = &affinity_buf[i];
|
||||
pthread_create(&dev->q[i].thread, NULL,
|
||||
ublk_io_handler_fn,
|
||||
&dev->q[i]);
|
||||
&qinfo[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < dinfo->nr_hw_queues; i++)
|
||||
sem_wait(&queue_sem);
|
||||
free(qinfo);
|
||||
free(affinity_buf);
|
||||
|
||||
/* everything is fine now, start us */
|
||||
ublk_set_parameters(dev);
|
||||
ret = ublk_ctrl_start_dev(dev, getpid());
|
||||
if (ctx->recovery)
|
||||
ret = ublk_ctrl_end_user_recovery(dev, getpid());
|
||||
else {
|
||||
ublk_set_parameters(dev);
|
||||
ret = ublk_ctrl_start_dev(dev, getpid());
|
||||
}
|
||||
if (ret < 0) {
|
||||
ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
|
||||
goto fail;
|
||||
@@ -691,7 +857,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
|
||||
if (ctx->fg)
|
||||
ublk_ctrl_dump(dev);
|
||||
else
|
||||
ublk_send_dev_event(ctx, dev->dev_info.dev_id);
|
||||
ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
|
||||
|
||||
/* wait until we are terminated */
|
||||
for (i = 0; i < dinfo->nr_hw_queues; i++)
|
||||
@@ -856,7 +1022,10 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
ret = ublk_ctrl_add_dev(dev);
|
||||
if (ctx->recovery)
|
||||
ret = ublk_ctrl_start_user_recovery(dev);
|
||||
else
|
||||
ret = ublk_ctrl_add_dev(dev);
|
||||
if (ret < 0) {
|
||||
ublk_err("%s: can't add dev id %d, type %s ret %d\n",
|
||||
__func__, dev_id, tgt_type, ret);
|
||||
@@ -870,7 +1039,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
|
||||
|
||||
fail:
|
||||
if (ret < 0)
|
||||
ublk_send_dev_event(ctx, -1);
|
||||
ublk_send_dev_event(ctx, dev, -1);
|
||||
ublk_ctrl_deinit(dev);
|
||||
return ret;
|
||||
}
|
||||
@@ -884,30 +1053,58 @@ static int cmd_dev_add(struct dev_ctx *ctx)
|
||||
if (ctx->fg)
|
||||
goto run;
|
||||
|
||||
ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666);
|
||||
if (ctx->_shmid < 0) {
|
||||
ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno));
|
||||
exit(-1);
|
||||
}
|
||||
ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0);
|
||||
if (ctx->shadow_dev == (struct ublk_dev *)-1) {
|
||||
ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno));
|
||||
exit(-1);
|
||||
}
|
||||
ctx->_evtfd = eventfd(0, 0);
|
||||
if (ctx->_evtfd < 0) {
|
||||
ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
setsid();
|
||||
res = fork();
|
||||
if (res == 0) {
|
||||
int res2;
|
||||
|
||||
setsid();
|
||||
res2 = fork();
|
||||
if (res2 == 0) {
|
||||
/* prepare for detaching */
|
||||
close(STDIN_FILENO);
|
||||
close(STDOUT_FILENO);
|
||||
close(STDERR_FILENO);
|
||||
run:
|
||||
res = __cmd_dev_add(ctx);
|
||||
return res;
|
||||
res = __cmd_dev_add(ctx);
|
||||
return res;
|
||||
} else {
|
||||
/* detached from the foreground task */
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
} else if (res > 0) {
|
||||
uint64_t id;
|
||||
int exit_code = EXIT_FAILURE;
|
||||
|
||||
res = read(ctx->_evtfd, &id, sizeof(id));
|
||||
close(ctx->_evtfd);
|
||||
if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
|
||||
ctx->dev_id = id - 1;
|
||||
return __cmd_dev_list(ctx);
|
||||
if (__cmd_dev_list(ctx) >= 0)
|
||||
exit_code = EXIT_SUCCESS;
|
||||
}
|
||||
exit(EXIT_FAILURE);
|
||||
shmdt(ctx->shadow_dev);
|
||||
shmctl(ctx->_shmid, IPC_RMID, NULL);
|
||||
/* wait for child and detach from it */
|
||||
wait(NULL);
|
||||
exit(exit_code);
|
||||
} else {
|
||||
return res;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -969,6 +1166,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx)
|
||||
ublk_err("%s: can't get dev info from %d: %d\n",
|
||||
__func__, ctx->dev_id, ret);
|
||||
} else {
|
||||
if (ctx->shadow_dev)
|
||||
memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q));
|
||||
|
||||
ublk_ctrl_dump(dev);
|
||||
}
|
||||
|
||||
@@ -1039,14 +1239,47 @@ static int cmd_dev_get_features(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __cmd_create_help(char *exe, bool recovery)
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
|
||||
exe, recovery ? "recover" : "add");
|
||||
printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n");
|
||||
printf("\t[-e 0|1 ] [-i 0|1]\n");
|
||||
printf("\t[target options] [backfile1] [backfile2] ...\n");
|
||||
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
|
||||
|
||||
for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
|
||||
const struct ublk_tgt_ops *ops = tgt_ops_list[i];
|
||||
|
||||
if (ops->usage)
|
||||
ops->usage(ops);
|
||||
}
|
||||
}
|
||||
|
||||
static void cmd_add_help(char *exe)
|
||||
{
|
||||
__cmd_create_help(exe, false);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void cmd_recover_help(char *exe)
|
||||
{
|
||||
__cmd_create_help(exe, true);
|
||||
printf("\tPlease provide exact command line for creating this device with real dev_id\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static int cmd_dev_help(char *exe)
|
||||
{
|
||||
printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe);
|
||||
printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
|
||||
cmd_add_help(exe);
|
||||
cmd_recover_help(exe);
|
||||
|
||||
printf("%s del [-n dev_id] -a \n", exe);
|
||||
printf("\t -a delete all devices -n delete specified device\n");
|
||||
printf("\t -a delete all devices -n delete specified device\n\n");
|
||||
printf("%s list [-n dev_id] -a \n", exe);
|
||||
printf("\t -a list all devices, -n list specified device, default -a \n");
|
||||
printf("\t -a list all devices, -n list specified device, default -a \n\n");
|
||||
printf("%s features\n", exe);
|
||||
return 0;
|
||||
}
|
||||
@@ -1063,9 +1296,13 @@ int main(int argc, char *argv[])
|
||||
{ "quiet", 0, NULL, 0 },
|
||||
{ "zero_copy", 0, NULL, 'z' },
|
||||
{ "foreground", 0, NULL, 0 },
|
||||
{ "chunk_size", 1, NULL, 0 },
|
||||
{ "recovery", 1, NULL, 'r' },
|
||||
{ "recovery_fail_io", 1, NULL, 'e'},
|
||||
{ "recovery_reissue", 1, NULL, 'i'},
|
||||
{ "get_data", 1, NULL, 'g'},
|
||||
{ 0, 0, 0, 0 }
|
||||
};
|
||||
const struct ublk_tgt_ops *ops = NULL;
|
||||
int option_idx, opt;
|
||||
const char *cmd = argv[1];
|
||||
struct dev_ctx ctx = {
|
||||
@@ -1073,15 +1310,18 @@ int main(int argc, char *argv[])
|
||||
.nr_hw_queues = 2,
|
||||
.dev_id = -1,
|
||||
.tgt_type = "unknown",
|
||||
.chunk_size = 65536, /* def chunk size is 64K */
|
||||
};
|
||||
int ret = -EINVAL, i;
|
||||
int tgt_argc = 1;
|
||||
char *tgt_argv[MAX_NR_TGT_ARG] = { NULL };
|
||||
int value;
|
||||
|
||||
if (argc == 1)
|
||||
return ret;
|
||||
|
||||
opterr = 0;
|
||||
optind = 2;
|
||||
while ((opt = getopt_long(argc, argv, "t:n:d:q:az",
|
||||
while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz",
|
||||
longopts, &option_idx)) != -1) {
|
||||
switch (opt) {
|
||||
case 'a':
|
||||
@@ -1103,6 +1343,24 @@ int main(int argc, char *argv[])
|
||||
case 'z':
|
||||
ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
|
||||
break;
|
||||
case 'r':
|
||||
value = strtol(optarg, NULL, 10);
|
||||
if (value)
|
||||
ctx.flags |= UBLK_F_USER_RECOVERY;
|
||||
break;
|
||||
case 'e':
|
||||
value = strtol(optarg, NULL, 10);
|
||||
if (value)
|
||||
ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO;
|
||||
break;
|
||||
case 'i':
|
||||
value = strtol(optarg, NULL, 10);
|
||||
if (value)
|
||||
ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
|
||||
break;
|
||||
case 'g':
|
||||
ctx.flags |= UBLK_F_NEED_GET_DATA;
|
||||
break;
|
||||
case 0:
|
||||
if (!strcmp(longopts[option_idx].name, "debug_mask"))
|
||||
ublk_dbg_mask = strtol(optarg, NULL, 16);
|
||||
@@ -1110,8 +1368,26 @@ int main(int argc, char *argv[])
|
||||
ublk_dbg_mask = 0;
|
||||
if (!strcmp(longopts[option_idx].name, "foreground"))
|
||||
ctx.fg = 1;
|
||||
if (!strcmp(longopts[option_idx].name, "chunk_size"))
|
||||
ctx.chunk_size = strtol(optarg, NULL, 10);
|
||||
break;
|
||||
case '?':
|
||||
/*
|
||||
* target requires every option must have argument
|
||||
*/
|
||||
if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') {
|
||||
fprintf(stderr, "every target option requires argument: %s %s\n",
|
||||
argv[optind - 1], argv[optind]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) {
|
||||
tgt_argv[tgt_argc++] = argv[optind - 1];
|
||||
tgt_argv[tgt_argc++] = argv[optind];
|
||||
} else {
|
||||
fprintf(stderr, "too many target options\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
optind += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1120,9 +1396,25 @@ int main(int argc, char *argv[])
|
||||
ctx.files[ctx.nr_files++] = argv[i++];
|
||||
}
|
||||
|
||||
ops = ublk_find_tgt(ctx.tgt_type);
|
||||
if (ops && ops->parse_cmd_line) {
|
||||
optind = 0;
|
||||
|
||||
tgt_argv[0] = ctx.tgt_type;
|
||||
ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv);
|
||||
}
|
||||
|
||||
if (!strcmp(cmd, "add"))
|
||||
ret = cmd_dev_add(&ctx);
|
||||
else if (!strcmp(cmd, "del"))
|
||||
else if (!strcmp(cmd, "recover")) {
|
||||
if (ctx.dev_id < 0) {
|
||||
fprintf(stderr, "device id isn't provided for recovering\n");
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
ctx.recovery = 1;
|
||||
ret = cmd_dev_add(&ctx);
|
||||
}
|
||||
} else if (!strcmp(cmd, "del"))
|
||||
ret = cmd_dev_del(&ctx);
|
||||
else if (!strcmp(cmd, "list")) {
|
||||
ctx.all = 1;
|
||||
|
||||
@@ -20,9 +20,15 @@
|
||||
#include <sys/wait.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <liburing.h>
|
||||
#include <linux/ublk_cmd.h>
|
||||
#include <semaphore.h>
|
||||
|
||||
/* allow ublk_dep.h to override ublk_cmd.h */
|
||||
#include "ublk_dep.h"
|
||||
#include <linux/ublk_cmd.h>
|
||||
|
||||
#define __maybe_unused __attribute__((unused))
|
||||
#define MAX_BACK_FILES 4
|
||||
@@ -30,6 +36,8 @@
|
||||
#define min(a, b) ((a) < (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
/****************** part 1: libublk ********************/
|
||||
|
||||
#define CTRL_DEV "/dev/ublk-control"
|
||||
@@ -42,8 +50,8 @@
|
||||
#define UBLKSRV_IO_IDLE_SECS 20
|
||||
|
||||
#define UBLK_IO_MAX_BYTES (1 << 20)
|
||||
#define UBLK_MAX_QUEUES 4
|
||||
#define UBLK_QUEUE_DEPTH 128
|
||||
#define UBLK_MAX_QUEUES 32
|
||||
#define UBLK_QUEUE_DEPTH 1024
|
||||
|
||||
#define UBLK_DBG_DEV (1U << 0)
|
||||
#define UBLK_DBG_QUEUE (1U << 1)
|
||||
@@ -55,6 +63,16 @@
|
||||
struct ublk_dev;
|
||||
struct ublk_queue;
|
||||
|
||||
struct stripe_ctx {
|
||||
/* stripe */
|
||||
unsigned int chunk_size;
|
||||
};
|
||||
|
||||
struct fault_inject_ctx {
|
||||
/* fault_inject */
|
||||
unsigned long delay_us;
|
||||
};
|
||||
|
||||
struct dev_ctx {
|
||||
char tgt_type[16];
|
||||
unsigned long flags;
|
||||
@@ -66,11 +84,18 @@ struct dev_ctx {
|
||||
unsigned int logging:1;
|
||||
unsigned int all:1;
|
||||
unsigned int fg:1;
|
||||
|
||||
/* stripe */
|
||||
unsigned int chunk_size;
|
||||
unsigned int recovery:1;
|
||||
|
||||
int _evtfd;
|
||||
int _shmid;
|
||||
|
||||
/* built from shmem, only for ublk_dump_dev() */
|
||||
struct ublk_dev *shadow_dev;
|
||||
|
||||
union {
|
||||
struct stripe_ctx stripe;
|
||||
struct fault_inject_ctx fault_inject;
|
||||
};
|
||||
};
|
||||
|
||||
struct ublk_ctrl_cmd_data {
|
||||
@@ -90,6 +115,7 @@ struct ublk_io {
|
||||
#define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
|
||||
#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
|
||||
#define UBLKSRV_IO_FREE (1UL << 2)
|
||||
#define UBLKSRV_NEED_GET_DATA (1UL << 3)
|
||||
unsigned short flags;
|
||||
unsigned short refs; /* used by target code only */
|
||||
|
||||
@@ -107,6 +133,14 @@ struct ublk_tgt_ops {
|
||||
int (*queue_io)(struct ublk_queue *, int tag);
|
||||
void (*tgt_io_done)(struct ublk_queue *,
|
||||
int tag, const struct io_uring_cqe *);
|
||||
|
||||
/*
|
||||
* Target specific command line handling
|
||||
*
|
||||
* each option requires argument for target command line
|
||||
*/
|
||||
void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
|
||||
void (*usage)(const struct ublk_tgt_ops *ops);
|
||||
};
|
||||
|
||||
struct ublk_tgt {
|
||||
@@ -357,6 +391,7 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q)
|
||||
extern const struct ublk_tgt_ops null_tgt_ops;
|
||||
extern const struct ublk_tgt_ops loop_tgt_ops;
|
||||
extern const struct ublk_tgt_ops stripe_tgt_ops;
|
||||
extern const struct ublk_tgt_ops fault_inject_tgt_ops;
|
||||
|
||||
void backing_file_tgt_deinit(struct ublk_dev *dev);
|
||||
int backing_file_tgt_init(struct ublk_dev *dev);
|
||||
|
||||
@@ -281,7 +281,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
|
||||
.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
|
||||
},
|
||||
};
|
||||
unsigned chunk_size = ctx->chunk_size;
|
||||
unsigned chunk_size = ctx->stripe.chunk_size;
|
||||
struct stripe_conf *conf;
|
||||
unsigned chunk_shift;
|
||||
loff_t bytes = 0;
|
||||
@@ -344,10 +344,36 @@ static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
|
||||
backing_file_tgt_deinit(dev);
|
||||
}
|
||||
|
||||
static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "chunk_size", 1, NULL, 0 },
|
||||
{ 0, 0, 0, 0 }
|
||||
};
|
||||
int option_idx, opt;
|
||||
|
||||
ctx->stripe.chunk_size = 65536;
|
||||
while ((opt = getopt_long(argc, argv, "",
|
||||
longopts, &option_idx)) != -1) {
|
||||
switch (opt) {
|
||||
case 0:
|
||||
if (!strcmp(longopts[option_idx].name, "chunk_size"))
|
||||
ctx->stripe.chunk_size = strtol(optarg, NULL, 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ublk_stripe_usage(const struct ublk_tgt_ops *ops)
|
||||
{
|
||||
printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n");
|
||||
}
|
||||
|
||||
const struct ublk_tgt_ops stripe_tgt_ops = {
|
||||
.name = "stripe",
|
||||
.init_tgt = ublk_stripe_tgt_init,
|
||||
.deinit_tgt = ublk_stripe_tgt_deinit,
|
||||
.queue_io = ublk_stripe_queue_io,
|
||||
.tgt_io_done = ublk_stripe_io_done,
|
||||
.parse_cmd_line = ublk_stripe_cmd_line,
|
||||
.usage = ublk_stripe_usage,
|
||||
};
|
||||
|
||||
@@ -17,8 +17,8 @@ _get_disk_dev_t() {
|
||||
local minor
|
||||
|
||||
dev=/dev/ublkb"${dev_id}"
|
||||
major=$(stat -c '%Hr' "$dev")
|
||||
minor=$(stat -c '%Lr' "$dev")
|
||||
major="0x"$(stat -c '%t' "$dev")
|
||||
minor="0x"$(stat -c '%T' "$dev")
|
||||
|
||||
echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
|
||||
}
|
||||
@@ -30,18 +30,26 @@ _run_fio_verify_io() {
|
||||
}
|
||||
|
||||
_create_backfile() {
|
||||
local my_size=$1
|
||||
local my_file
|
||||
local index=$1
|
||||
local new_size=$2
|
||||
local old_file
|
||||
local new_file
|
||||
|
||||
my_file=$(mktemp ublk_file_"${my_size}"_XXXXX)
|
||||
truncate -s "${my_size}" "${my_file}"
|
||||
echo "$my_file"
|
||||
old_file="${UBLK_BACKFILES[$index]}"
|
||||
[ -f "$old_file" ] && rm -f "$old_file"
|
||||
|
||||
new_file=$(mktemp ublk_file_"${new_size}"_XXXXX)
|
||||
truncate -s "${new_size}" "${new_file}"
|
||||
UBLK_BACKFILES["$index"]="$new_file"
|
||||
}
|
||||
|
||||
_remove_backfile() {
|
||||
local file=$1
|
||||
_remove_files() {
|
||||
local file
|
||||
|
||||
[ -f "$file" ] && rm -f "$file"
|
||||
for file in "${UBLK_BACKFILES[@]}"; do
|
||||
[ -f "$file" ] && rm -f "$file"
|
||||
done
|
||||
[ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP"
|
||||
}
|
||||
|
||||
_create_tmp_dir() {
|
||||
@@ -106,6 +114,7 @@ _prep_test() {
|
||||
local type=$1
|
||||
shift 1
|
||||
modprobe ublk_drv > /dev/null 2>&1
|
||||
UBLK_TMP=$(mktemp ublk_test_XXXXX)
|
||||
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*"
|
||||
}
|
||||
|
||||
@@ -129,7 +138,10 @@ _show_result()
|
||||
echo "$1 : [FAIL]"
|
||||
fi
|
||||
fi
|
||||
[ "$2" -ne 0 ] && exit "$2"
|
||||
if [ "$2" -ne 0 ]; then
|
||||
_remove_files
|
||||
exit "$2"
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
@@ -138,16 +150,16 @@ _check_add_dev()
|
||||
{
|
||||
local tid=$1
|
||||
local code=$2
|
||||
shift 2
|
||||
|
||||
if [ "${code}" -ne 0 ]; then
|
||||
_remove_test_files "$@"
|
||||
_show_result "${tid}" "${code}"
|
||||
fi
|
||||
}
|
||||
|
||||
_cleanup_test() {
|
||||
"${UBLK_PROG}" del -a
|
||||
rm -f "$UBLK_TMP"
|
||||
|
||||
_remove_files
|
||||
}
|
||||
|
||||
_have_feature()
|
||||
@@ -158,9 +170,11 @@ _have_feature()
|
||||
return 1
|
||||
}
|
||||
|
||||
_add_ublk_dev() {
|
||||
local kublk_temp;
|
||||
_create_ublk_dev() {
|
||||
local dev_id;
|
||||
local cmd=$1
|
||||
|
||||
shift 1
|
||||
|
||||
if [ ! -c /dev/ublk-control ]; then
|
||||
return ${UBLK_SKIP_CODE}
|
||||
@@ -171,17 +185,34 @@ _add_ublk_dev() {
|
||||
fi
|
||||
fi
|
||||
|
||||
kublk_temp=$(mktemp /tmp/kublk-XXXXXX)
|
||||
if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then
|
||||
if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then
|
||||
echo "fail to add ublk dev $*"
|
||||
rm -f "${kublk_temp}"
|
||||
return 255
|
||||
fi
|
||||
|
||||
dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}')
|
||||
udevadm settle
|
||||
rm -f "${kublk_temp}"
|
||||
echo "${dev_id}"
|
||||
|
||||
if [[ "$dev_id" =~ ^[0-9]+$ ]]; then
|
||||
echo "${dev_id}"
|
||||
else
|
||||
return 255
|
||||
fi
|
||||
}
|
||||
|
||||
_add_ublk_dev() {
|
||||
_create_ublk_dev "add" "$@"
|
||||
}
|
||||
|
||||
_recover_ublk_dev() {
|
||||
local dev_id
|
||||
local state
|
||||
|
||||
dev_id=$(_create_ublk_dev "recover" "$@")
|
||||
for ((j=0;j<20;j++)); do
|
||||
state=$(_get_ublk_dev_state "${dev_id}")
|
||||
[ "$state" == "LIVE" ] && break
|
||||
sleep 1
|
||||
done
|
||||
echo "$state"
|
||||
}
|
||||
|
||||
# kill the ublk daemon and return ublk device state
|
||||
@@ -220,7 +251,7 @@ __run_io_and_remove()
|
||||
local kill_server=$3
|
||||
|
||||
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
|
||||
--rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \
|
||||
--rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
|
||||
--runtime=20 --time_based > /dev/null 2>&1 &
|
||||
sleep 2
|
||||
if [ "${kill_server}" = "yes" ]; then
|
||||
@@ -238,15 +269,80 @@ __run_io_and_remove()
|
||||
wait
|
||||
}
|
||||
|
||||
run_io_and_remove()
|
||||
{
|
||||
local size=$1
|
||||
local dev_id
|
||||
shift 1
|
||||
|
||||
dev_id=$(_add_ublk_dev "$@")
|
||||
_check_add_dev "$TID" $?
|
||||
|
||||
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
|
||||
if ! __run_io_and_remove "$dev_id" "${size}" "no"; then
|
||||
echo "/dev/ublkc$dev_id isn't removed"
|
||||
exit 255
|
||||
fi
|
||||
}
|
||||
|
||||
run_io_and_kill_daemon()
|
||||
{
|
||||
local size=$1
|
||||
local dev_id
|
||||
shift 1
|
||||
|
||||
dev_id=$(_add_ublk_dev "$@")
|
||||
_check_add_dev "$TID" $?
|
||||
|
||||
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
|
||||
if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then
|
||||
echo "/dev/ublkc$dev_id isn't removed res ${res}"
|
||||
exit 255
|
||||
fi
|
||||
}
|
||||
|
||||
run_io_and_recover()
|
||||
{
|
||||
local state
|
||||
local dev_id
|
||||
|
||||
dev_id=$(_add_ublk_dev "$@")
|
||||
_check_add_dev "$TID" $?
|
||||
|
||||
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
|
||||
--rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
|
||||
--runtime=20 --time_based > /dev/null 2>&1 &
|
||||
sleep 4
|
||||
|
||||
state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
|
||||
if [ "$state" != "QUIESCED" ]; then
|
||||
echo "device isn't quiesced($state) after killing daemon"
|
||||
return 255
|
||||
fi
|
||||
|
||||
state=$(_recover_ublk_dev -n "$dev_id" "$@")
|
||||
if [ "$state" != "LIVE" ]; then
|
||||
echo "faile to recover to LIVE($state)"
|
||||
return 255
|
||||
fi
|
||||
|
||||
if ! __remove_ublk_dev_return "${dev_id}"; then
|
||||
echo "delete dev ${dev_id} failed"
|
||||
return 255
|
||||
fi
|
||||
wait
|
||||
}
|
||||
|
||||
|
||||
_ublk_test_top_dir()
|
||||
{
|
||||
cd "$(dirname "$0")" && pwd
|
||||
}
|
||||
|
||||
UBLK_TMP=$(mktemp ublk_test_XXXXX)
|
||||
UBLK_PROG=$(_ublk_test_top_dir)/kublk
|
||||
UBLK_TEST_QUIET=1
|
||||
UBLK_TEST_SHOW_RESULT=1
|
||||
UBLK_BACKFILES=()
|
||||
export UBLK_PROG
|
||||
export UBLK_TEST_QUIET
|
||||
export UBLK_TEST_SHOW_RESULT
|
||||
|
||||
40
tools/testing/selftests/ublk/test_generic_04.sh
Executable file
40
tools/testing/selftests/ublk/test_generic_04.sh
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="generic_04"
|
||||
ERR_CODE=0
|
||||
|
||||
ublk_run_recover_test()
|
||||
{
|
||||
run_io_and_recover "$@"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
echo "$TID failure: $*"
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
}
|
||||
|
||||
if ! _have_program fio; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
_prep_test "recover" "basic recover function verification"
|
||||
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 128M
|
||||
_create_backfile 2 128M
|
||||
|
||||
ublk_run_recover_test -t null -q 2 -r 1 &
|
||||
ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" &
|
||||
ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
|
||||
wait
|
||||
|
||||
ublk_run_recover_test -t null -q 2 -r 1 -i 1 &
|
||||
ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" &
|
||||
ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
|
||||
wait
|
||||
|
||||
_cleanup_test "recover"
|
||||
_show_result $TID $ERR_CODE
|
||||
44
tools/testing/selftests/ublk/test_generic_05.sh
Executable file
44
tools/testing/selftests/ublk/test_generic_05.sh
Executable file
@@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="generic_05"
|
||||
ERR_CODE=0
|
||||
|
||||
ublk_run_recover_test()
|
||||
{
|
||||
run_io_and_recover "$@"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
echo "$TID failure: $*"
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
}
|
||||
|
||||
if ! _have_program fio; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
if ! _have_feature "ZERO_COPY"; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
_prep_test "recover" "basic recover function verification (zero copy)"
|
||||
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 128M
|
||||
_create_backfile 2 128M
|
||||
|
||||
ublk_run_recover_test -t null -q 2 -r 1 -z &
|
||||
ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" &
|
||||
ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
|
||||
wait
|
||||
|
||||
ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 &
|
||||
ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" &
|
||||
ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
|
||||
wait
|
||||
|
||||
_cleanup_test "recover"
|
||||
_show_result $TID $ERR_CODE
|
||||
41
tools/testing/selftests/ublk/test_generic_06.sh
Executable file
41
tools/testing/selftests/ublk/test_generic_06.sh
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="generic_06"
|
||||
ERR_CODE=0
|
||||
|
||||
_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server"
|
||||
|
||||
# configure ublk server to sleep 2s before completing each I/O
|
||||
dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000)
|
||||
_check_add_dev $TID $?
|
||||
|
||||
STARTTIME=${SECONDS}
|
||||
|
||||
dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 &
|
||||
dd_pid=$!
|
||||
|
||||
__ublk_kill_daemon ${dev_id} "DEAD"
|
||||
|
||||
wait $dd_pid
|
||||
dd_exitcode=$?
|
||||
|
||||
ENDTIME=${SECONDS}
|
||||
ELAPSED=$(($ENDTIME - $STARTTIME))
|
||||
|
||||
# assert that dd sees an error and exits quickly after ublk server is
|
||||
# killed. previously this relied on seeing an I/O timeout and so would
|
||||
# take ~30s
|
||||
if [ $dd_exitcode -eq 0 ]; then
|
||||
echo "dd unexpectedly exited successfully!"
|
||||
ERR_CODE=255
|
||||
fi
|
||||
if [ $ELAPSED -ge 5 ]; then
|
||||
echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!"
|
||||
ERR_CODE=255
|
||||
fi
|
||||
|
||||
_cleanup_test "fault_inject"
|
||||
_show_result $TID $ERR_CODE
|
||||
28
tools/testing/selftests/ublk/test_generic_07.sh
Executable file
28
tools/testing/selftests/ublk/test_generic_07.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="generic_07"
|
||||
ERR_CODE=0
|
||||
|
||||
if ! _have_program fio; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
_prep_test "generic" "test UBLK_F_NEED_GET_DATA"
|
||||
|
||||
_create_backfile 0 256M
|
||||
dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
# run fio over the ublk disk
|
||||
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
|
||||
ERR_CODE=$?
|
||||
if [ "$ERR_CODE" -eq 0 ]; then
|
||||
_mkfs_mount_test /dev/ublkb"${dev_id}"
|
||||
ERR_CODE=$?
|
||||
fi
|
||||
|
||||
_cleanup_test "generic"
|
||||
_show_result $TID $ERR_CODE
|
||||
@@ -12,10 +12,10 @@ fi
|
||||
|
||||
_prep_test "loop" "write and verify test"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
_create_backfile 0 256M
|
||||
|
||||
dev_id=$(_add_ublk_dev -t loop "$backfile_0")
|
||||
_check_add_dev $TID $? "${backfile_0}"
|
||||
dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
# run fio over the ublk disk
|
||||
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
|
||||
@@ -23,6 +23,4 @@ ERR_CODE=$?
|
||||
|
||||
_cleanup_test "loop"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
@@ -8,15 +8,13 @@ ERR_CODE=0
|
||||
|
||||
_prep_test "loop" "mkfs & mount & umount"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
dev_id=$(_add_ublk_dev -t loop "$backfile_0")
|
||||
_check_add_dev $TID $? "$backfile_0"
|
||||
_create_backfile 0 256M
|
||||
dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
_mkfs_mount_test /dev/ublkb"${dev_id}"
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "loop"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
@@ -12,9 +12,9 @@ fi
|
||||
|
||||
_prep_test "loop" "write and verify over zero copy"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
|
||||
_check_add_dev $TID $? "$backfile_0"
|
||||
_create_backfile 0 256M
|
||||
dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
# run fio over the ublk disk
|
||||
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
|
||||
@@ -22,6 +22,4 @@ ERR_CODE=$?
|
||||
|
||||
_cleanup_test "loop"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
@@ -8,15 +8,14 @@ ERR_CODE=0
|
||||
|
||||
_prep_test "loop" "mkfs & mount & umount with zero copy"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
|
||||
_check_add_dev $TID $? "$backfile_0"
|
||||
_create_backfile 0 256M
|
||||
|
||||
dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
_mkfs_mount_test /dev/ublkb"${dev_id}"
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "loop"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
@@ -12,10 +12,10 @@ fi
|
||||
|
||||
_prep_test "loop" "write and verify test"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
_create_backfile 0 256M
|
||||
|
||||
dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0")
|
||||
_check_add_dev $TID $? "${backfile_0}"
|
||||
dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
# run fio over the ublk disk
|
||||
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
|
||||
@@ -23,6 +23,4 @@ ERR_CODE=$?
|
||||
|
||||
_cleanup_test "loop"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
@@ -4,44 +4,31 @@
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
TID="stress_01"
|
||||
ERR_CODE=0
|
||||
DEV_ID=-1
|
||||
|
||||
ublk_io_and_remove()
|
||||
{
|
||||
local size=$1
|
||||
shift 1
|
||||
local backfile=""
|
||||
if echo "$@" | grep -q "loop"; then
|
||||
backfile=${*: -1}
|
||||
fi
|
||||
DEV_ID=$(_add_ublk_dev "$@")
|
||||
_check_add_dev $TID $? "${backfile}"
|
||||
|
||||
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
|
||||
if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then
|
||||
echo "/dev/ublkc${DEV_ID} isn't removed"
|
||||
_remove_backfile "${backfile}"
|
||||
exit 255
|
||||
run_io_and_remove "$@"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
echo "$TID failure: $*"
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
}
|
||||
|
||||
if ! _have_program fio; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
_prep_test "stress" "run IO and remove device"
|
||||
|
||||
ublk_io_and_remove 8G -t null -q 4
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 128M
|
||||
_create_backfile 2 128M
|
||||
|
||||
BACK_FILE=$(_create_backfile 256M)
|
||||
ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
ublk_io_and_remove 8G -t null -q 4 &
|
||||
ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
|
||||
ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
|
||||
wait
|
||||
|
||||
ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}"
|
||||
ERR_CODE=$?
|
||||
_cleanup_test "stress"
|
||||
_remove_backfile "${BACK_FILE}"
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
@@ -4,44 +4,31 @@
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
TID="stress_02"
|
||||
ERR_CODE=0
|
||||
DEV_ID=-1
|
||||
|
||||
if ! _have_program fio; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
ublk_io_and_kill_daemon()
|
||||
{
|
||||
local size=$1
|
||||
shift 1
|
||||
local backfile=""
|
||||
if echo "$@" | grep -q "loop"; then
|
||||
backfile=${*: -1}
|
||||
fi
|
||||
DEV_ID=$(_add_ublk_dev "$@")
|
||||
_check_add_dev $TID $? "${backfile}"
|
||||
|
||||
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
|
||||
if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then
|
||||
echo "/dev/ublkc${DEV_ID} isn't removed res ${res}"
|
||||
_remove_backfile "${backfile}"
|
||||
exit 255
|
||||
run_io_and_kill_daemon "$@"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
echo "$TID failure: $*"
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
}
|
||||
|
||||
_prep_test "stress" "run IO and kill ublk server"
|
||||
|
||||
ublk_io_and_kill_daemon 8G -t null -q 4
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 128M
|
||||
_create_backfile 2 128M
|
||||
|
||||
BACK_FILE=$(_create_backfile 256M)
|
||||
ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
ublk_io_and_kill_daemon 8G -t null -q 4 &
|
||||
ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
|
||||
ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
|
||||
wait
|
||||
|
||||
ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}"
|
||||
ERR_CODE=$?
|
||||
_cleanup_test "stress"
|
||||
_remove_backfile "${BACK_FILE}"
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
38
tools/testing/selftests/ublk/test_stress_03.sh
Executable file
38
tools/testing/selftests/ublk/test_stress_03.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
TID="stress_03"
|
||||
ERR_CODE=0
|
||||
|
||||
ublk_io_and_remove()
|
||||
{
|
||||
run_io_and_remove "$@"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
echo "$TID failure: $*"
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
}
|
||||
|
||||
if ! _have_program fio; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
if ! _have_feature "ZERO_COPY"; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
_prep_test "stress" "run IO and remove device(zero copy)"
|
||||
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 128M
|
||||
_create_backfile 2 128M
|
||||
|
||||
ublk_io_and_remove 8G -t null -q 4 -z &
|
||||
ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
|
||||
ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
|
||||
wait
|
||||
|
||||
_cleanup_test "stress"
|
||||
_show_result $TID $ERR_CODE
|
||||
37
tools/testing/selftests/ublk/test_stress_04.sh
Executable file
37
tools/testing/selftests/ublk/test_stress_04.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
TID="stress_04"
|
||||
ERR_CODE=0
|
||||
|
||||
ublk_io_and_kill_daemon()
|
||||
{
|
||||
run_io_and_kill_daemon "$@"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
echo "$TID failure: $*"
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
}
|
||||
|
||||
if ! _have_program fio; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
if ! _have_feature "ZERO_COPY"; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
_prep_test "stress" "run IO and kill ublk server(zero copy)"
|
||||
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 128M
|
||||
_create_backfile 2 128M
|
||||
|
||||
ublk_io_and_kill_daemon 8G -t null -q 4 -z &
|
||||
ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
|
||||
ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
|
||||
wait
|
||||
|
||||
_cleanup_test "stress"
|
||||
_show_result $TID $ERR_CODE
|
||||
64
tools/testing/selftests/ublk/test_stress_05.sh
Executable file
64
tools/testing/selftests/ublk/test_stress_05.sh
Executable file
@@ -0,0 +1,64 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
TID="stress_05"
|
||||
ERR_CODE=0
|
||||
|
||||
run_io_and_remove()
|
||||
{
|
||||
local size=$1
|
||||
local dev_id
|
||||
local dev_pid
|
||||
shift 1
|
||||
|
||||
dev_id=$(_add_ublk_dev "$@")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
|
||||
|
||||
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
|
||||
--rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \
|
||||
--runtime=40 --time_based > /dev/null 2>&1 &
|
||||
sleep 4
|
||||
|
||||
dev_pid=$(_get_ublk_daemon_pid "$dev_id")
|
||||
kill -9 "$dev_pid"
|
||||
|
||||
if ! __remove_ublk_dev_return "${dev_id}"; then
|
||||
echo "delete dev ${dev_id} failed"
|
||||
return 255
|
||||
fi
|
||||
}
|
||||
|
||||
ublk_io_and_remove()
|
||||
{
|
||||
run_io_and_remove "$@"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
echo "$TID failure: $*"
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
}
|
||||
|
||||
_prep_test "stress" "run IO and remove device with recovery enabled"
|
||||
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 256M
|
||||
|
||||
for reissue in $(seq 0 1); do
|
||||
ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" &
|
||||
ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" &
|
||||
wait
|
||||
done
|
||||
|
||||
if _have_feature "ZERO_COPY"; then
|
||||
for reissue in $(seq 0 1); do
|
||||
ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" &
|
||||
ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
|
||||
wait
|
||||
done
|
||||
fi
|
||||
|
||||
_cleanup_test "stress"
|
||||
_show_result $TID $ERR_CODE
|
||||
@@ -12,19 +12,15 @@ fi
|
||||
|
||||
_prep_test "stripe" "write and verify test"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
backfile_1=$(_create_backfile 256M)
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 256M
|
||||
|
||||
dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
|
||||
_check_add_dev $TID $? "${backfile_0}"
|
||||
dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
# run fio over the ublk disk
|
||||
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "stripe"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
_remove_backfile "$backfile_1"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
@@ -8,17 +8,14 @@ ERR_CODE=0
|
||||
|
||||
_prep_test "stripe" "mkfs & mount & umount"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
backfile_1=$(_create_backfile 256M)
|
||||
dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
|
||||
_check_add_dev $TID $? "$backfile_0" "$backfile_1"
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 256M
|
||||
|
||||
dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
_mkfs_mount_test /dev/ublkb"${dev_id}"
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "stripe"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
_remove_backfile "$backfile_1"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
@@ -12,19 +12,15 @@ fi
|
||||
|
||||
_prep_test "stripe" "write and verify test"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
backfile_1=$(_create_backfile 256M)
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 256M
|
||||
|
||||
dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1")
|
||||
_check_add_dev $TID $? "${backfile_0}"
|
||||
dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
# run fio over the ublk disk
|
||||
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "stripe"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
_remove_backfile "$backfile_1"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
@@ -8,17 +8,14 @@ ERR_CODE=0
|
||||
|
||||
_prep_test "stripe" "mkfs & mount & umount on zero copy"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
backfile_1=$(_create_backfile 256M)
|
||||
dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1")
|
||||
_check_add_dev $TID $? "$backfile_0" "$backfile_1"
|
||||
_create_backfile 0 256M
|
||||
_create_backfile 1 256M
|
||||
|
||||
dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
|
||||
_check_add_dev $TID $?
|
||||
|
||||
_mkfs_mount_test /dev/ublkb"${dev_id}"
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "stripe"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
_remove_backfile "$backfile_1"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
||||
|
||||
Reference in New Issue
Block a user