mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-03 11:12:01 -04:00
Merge tag 'bcachefs-2025-04-24' of git://evilpiepirate.org/bcachefs
Pull bcachefs fixes from Kent Overstreet: - Case insensitive directories now work - Ciemap now correctly reports on unwritten pagecache data - bcachefs tools 1.25.1 was incorrectly picking unaligned bucket sizes; fix journal and write path bugs this uncovered And assorted smaller fixes... * tag 'bcachefs-2025-04-24' of git://evilpiepirate.org/bcachefs: (24 commits) bcachefs: Rework fiemap transaction restart handling bcachefs: add fiemap delalloc extent detection bcachefs: refactor fiemap processing into extent helper and struct bcachefs: track current fiemap offset in start variable bcachefs: drop duplicate fiemap sync flag bcachefs: Fix btree_iter_peek_prev() at end of inode bcachefs: Make btree_iter_peek_prev() assert more precise bcachefs: Unit test fixes bcachefs: Print mount opts earlier bcachefs: unlink: casefold d_invalidate bcachefs: Fix casefold lookups bcachefs: Casefold is now a regular opts.h option bcachefs: Implement fileattr_(get|set) bcachefs: Allocator now copes with unaligned buckets bcachefs: Start copygc, rebalance threads earlier bcachefs: Refactor bch2_run_recovery_passes() bcachefs: bch2_copygc_wakeup() bcachefs: Fix ref leak in write_super() bcachefs: Change __journal_entry_close() assert to ERO bcachefs: Ensure journal space is block size aligned ...
This commit is contained in:
@@ -1425,6 +1425,8 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||
wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
|
||||
|
||||
wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c));
|
||||
|
||||
BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -110,7 +110,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ
|
||||
unsigned i;
|
||||
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||
ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
|
||||
ob_push(c, ob->sectors_free < block_sectors(c)
|
||||
? &ptrs
|
||||
: &keep, ob);
|
||||
wp->ptrs = keep;
|
||||
|
||||
mutex_unlock(&wp->lock);
|
||||
|
||||
@@ -366,6 +366,10 @@ static inline void bkey_init(struct bkey *k)
|
||||
#define __BKEY_PADDED(key, pad) \
|
||||
struct bkey_i key; __u64 key ## _pad[pad]
|
||||
|
||||
enum bch_bkey_type_flags {
|
||||
BKEY_TYPE_strict_btree_checks = BIT(0),
|
||||
};
|
||||
|
||||
/*
|
||||
* - DELETED keys are used internally to mark keys that should be ignored but
|
||||
* override keys in composition order. Their version number is ignored.
|
||||
@@ -383,46 +387,46 @@ static inline void bkey_init(struct bkey *k)
|
||||
*
|
||||
* - WHITEOUT: for hash table btrees
|
||||
*/
|
||||
#define BCH_BKEY_TYPES() \
|
||||
x(deleted, 0) \
|
||||
x(whiteout, 1) \
|
||||
x(error, 2) \
|
||||
x(cookie, 3) \
|
||||
x(hash_whiteout, 4) \
|
||||
x(btree_ptr, 5) \
|
||||
x(extent, 6) \
|
||||
x(reservation, 7) \
|
||||
x(inode, 8) \
|
||||
x(inode_generation, 9) \
|
||||
x(dirent, 10) \
|
||||
x(xattr, 11) \
|
||||
x(alloc, 12) \
|
||||
x(quota, 13) \
|
||||
x(stripe, 14) \
|
||||
x(reflink_p, 15) \
|
||||
x(reflink_v, 16) \
|
||||
x(inline_data, 17) \
|
||||
x(btree_ptr_v2, 18) \
|
||||
x(indirect_inline_data, 19) \
|
||||
x(alloc_v2, 20) \
|
||||
x(subvolume, 21) \
|
||||
x(snapshot, 22) \
|
||||
x(inode_v2, 23) \
|
||||
x(alloc_v3, 24) \
|
||||
x(set, 25) \
|
||||
x(lru, 26) \
|
||||
x(alloc_v4, 27) \
|
||||
x(backpointer, 28) \
|
||||
x(inode_v3, 29) \
|
||||
x(bucket_gens, 30) \
|
||||
x(snapshot_tree, 31) \
|
||||
x(logged_op_truncate, 32) \
|
||||
x(logged_op_finsert, 33) \
|
||||
x(accounting, 34) \
|
||||
x(inode_alloc_cursor, 35)
|
||||
#define BCH_BKEY_TYPES() \
|
||||
x(deleted, 0, 0) \
|
||||
x(whiteout, 1, 0) \
|
||||
x(error, 2, 0) \
|
||||
x(cookie, 3, 0) \
|
||||
x(hash_whiteout, 4, BKEY_TYPE_strict_btree_checks) \
|
||||
x(btree_ptr, 5, BKEY_TYPE_strict_btree_checks) \
|
||||
x(extent, 6, BKEY_TYPE_strict_btree_checks) \
|
||||
x(reservation, 7, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode, 8, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode_generation, 9, BKEY_TYPE_strict_btree_checks) \
|
||||
x(dirent, 10, BKEY_TYPE_strict_btree_checks) \
|
||||
x(xattr, 11, BKEY_TYPE_strict_btree_checks) \
|
||||
x(alloc, 12, BKEY_TYPE_strict_btree_checks) \
|
||||
x(quota, 13, BKEY_TYPE_strict_btree_checks) \
|
||||
x(stripe, 14, BKEY_TYPE_strict_btree_checks) \
|
||||
x(reflink_p, 15, BKEY_TYPE_strict_btree_checks) \
|
||||
x(reflink_v, 16, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inline_data, 17, BKEY_TYPE_strict_btree_checks) \
|
||||
x(btree_ptr_v2, 18, BKEY_TYPE_strict_btree_checks) \
|
||||
x(indirect_inline_data, 19, BKEY_TYPE_strict_btree_checks) \
|
||||
x(alloc_v2, 20, BKEY_TYPE_strict_btree_checks) \
|
||||
x(subvolume, 21, BKEY_TYPE_strict_btree_checks) \
|
||||
x(snapshot, 22, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode_v2, 23, BKEY_TYPE_strict_btree_checks) \
|
||||
x(alloc_v3, 24, BKEY_TYPE_strict_btree_checks) \
|
||||
x(set, 25, 0) \
|
||||
x(lru, 26, BKEY_TYPE_strict_btree_checks) \
|
||||
x(alloc_v4, 27, BKEY_TYPE_strict_btree_checks) \
|
||||
x(backpointer, 28, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode_v3, 29, BKEY_TYPE_strict_btree_checks) \
|
||||
x(bucket_gens, 30, BKEY_TYPE_strict_btree_checks) \
|
||||
x(snapshot_tree, 31, BKEY_TYPE_strict_btree_checks) \
|
||||
x(logged_op_truncate, 32, BKEY_TYPE_strict_btree_checks) \
|
||||
x(logged_op_finsert, 33, BKEY_TYPE_strict_btree_checks) \
|
||||
x(accounting, 34, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
#define x(name, nr, ...) KEY_TYPE_##name = nr,
|
||||
BCH_BKEY_TYPES()
|
||||
#undef x
|
||||
KEY_TYPE_MAX,
|
||||
@@ -863,6 +867,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
|
||||
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
|
||||
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
|
||||
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
|
||||
LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23);
|
||||
|
||||
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
|
||||
{
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include "xattr.h"
|
||||
|
||||
const char * const bch2_bkey_types[] = {
|
||||
#define x(name, nr) #name,
|
||||
#define x(name, nr, ...) #name,
|
||||
BCH_BKEY_TYPES()
|
||||
#undef x
|
||||
NULL
|
||||
@@ -115,7 +115,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_
|
||||
})
|
||||
|
||||
const struct bkey_ops bch2_bkey_ops[] = {
|
||||
#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
|
||||
#define x(name, nr, ...) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
|
||||
BCH_BKEY_TYPES()
|
||||
#undef x
|
||||
};
|
||||
@@ -155,6 +155,12 @@ static u64 bch2_key_types_allowed[] = {
|
||||
#undef x
|
||||
};
|
||||
|
||||
static const enum bch_bkey_type_flags bch2_bkey_type_flags[] = {
|
||||
#define x(name, nr, flags) [KEY_TYPE_##name] = flags,
|
||||
BCH_BKEY_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
const char *bch2_btree_node_type_str(enum btree_node_type type)
|
||||
{
|
||||
return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1);
|
||||
@@ -177,8 +183,18 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
if (type >= BKEY_TYPE_NR)
|
||||
return 0;
|
||||
|
||||
bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX &&
|
||||
(type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) &&
|
||||
enum bch_bkey_type_flags bkey_flags = k.k->type < KEY_TYPE_MAX
|
||||
? bch2_bkey_type_flags[k.k->type]
|
||||
: 0;
|
||||
|
||||
bool strict_key_type_allowed =
|
||||
(from.flags & BCH_VALIDATE_commit) ||
|
||||
type == BKEY_TYPE_btree ||
|
||||
(from.btree < BTREE_ID_NR &&
|
||||
(bkey_flags & BKEY_TYPE_strict_btree_checks));
|
||||
|
||||
bkey_fsck_err_on(strict_key_type_allowed &&
|
||||
k.k->type < KEY_TYPE_MAX &&
|
||||
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)),
|
||||
c, bkey_invalid_type_for_btree,
|
||||
"invalid key type for btree %s (%s)",
|
||||
|
||||
@@ -2577,7 +2577,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
|
||||
struct bpos end)
|
||||
{
|
||||
if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) &&
|
||||
!bkey_eq(iter->pos, POS_MAX)) {
|
||||
!bkey_eq(iter->pos, POS_MAX) &&
|
||||
!((iter->flags & BTREE_ITER_is_extents) &&
|
||||
iter->pos.offset == U64_MAX)) {
|
||||
|
||||
/*
|
||||
* bkey_start_pos(), for extents, is not monotonically
|
||||
* increasing until after filtering for snapshots:
|
||||
@@ -2602,7 +2605,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
|
||||
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode);
|
||||
|
||||
int ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
|
||||
|
||||
@@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int bch2_maybe_casefold(struct btree_trans *trans,
|
||||
const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
if (likely(!info->cf_encoding)) {
|
||||
*out_cf = *str;
|
||||
return 0;
|
||||
} else {
|
||||
return bch2_casefold(trans, info, str, out_cf);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
|
||||
{
|
||||
if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
|
||||
|
||||
@@ -23,6 +23,21 @@ struct bch_fs;
|
||||
struct bch_hash_info;
|
||||
struct bch_inode_info;
|
||||
|
||||
int bch2_casefold(struct btree_trans *, const struct bch_hash_info *,
|
||||
const struct qstr *, struct qstr *);
|
||||
|
||||
static inline int bch2_maybe_casefold(struct btree_trans *trans,
|
||||
const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
if (likely(!info->cf_encoding)) {
|
||||
*out_cf = *str;
|
||||
return 0;
|
||||
} else {
|
||||
return bch2_casefold(trans, info, str, out_cf);
|
||||
}
|
||||
}
|
||||
|
||||
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
|
||||
|
||||
static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len)
|
||||
|
||||
@@ -272,9 +272,6 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c,
|
||||
{
|
||||
struct fsck_err_state *s;
|
||||
|
||||
if (!test_bit(BCH_FS_fsck_running, &c->flags))
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry(s, &c->fsck_error_msgs, list)
|
||||
if (s->id == id) {
|
||||
/*
|
||||
@@ -639,14 +636,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print)
|
||||
{
|
||||
struct fsck_err_state *s, *n;
|
||||
|
||||
mutex_lock(&c->fsck_error_msgs_lock);
|
||||
|
||||
list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
|
||||
if (s->ratelimited && s->last_msg)
|
||||
if (print && s->ratelimited && s->last_msg)
|
||||
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
|
||||
|
||||
list_del(&s->list);
|
||||
@@ -657,6 +654,16 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
mutex_unlock(&c->fsck_error_msgs_lock);
|
||||
}
|
||||
|
||||
void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
{
|
||||
__bch2_flush_fsck_errs(c, true);
|
||||
}
|
||||
|
||||
void bch2_free_fsck_errs(struct bch_fs *c)
|
||||
{
|
||||
__bch2_flush_fsck_errs(c, false);
|
||||
}
|
||||
|
||||
int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
subvol_inum inum, u64 offset)
|
||||
{
|
||||
|
||||
@@ -93,6 +93,7 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
|
||||
_flags, BCH_FSCK_ERR_##_err_type, __VA_ARGS__)
|
||||
|
||||
void bch2_flush_fsck_errs(struct bch_fs *);
|
||||
void bch2_free_fsck_errs(struct bch_fs *);
|
||||
|
||||
#define fsck_err_wrap(_do) \
|
||||
({ \
|
||||
|
||||
@@ -21,206 +21,6 @@
|
||||
#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
|
||||
#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
|
||||
|
||||
struct flags_set {
|
||||
unsigned mask;
|
||||
unsigned flags;
|
||||
|
||||
unsigned projid;
|
||||
|
||||
bool set_projinherit;
|
||||
bool projinherit;
|
||||
};
|
||||
|
||||
static int bch2_inode_flags_set(struct btree_trans *trans,
|
||||
struct bch_inode_info *inode,
|
||||
struct bch_inode_unpacked *bi,
|
||||
void *p)
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
/*
|
||||
* We're relying on btree locking here for exclusion with other ioctl
|
||||
* calls - use the flags in the btree (@bi), not inode->i_flags:
|
||||
*/
|
||||
struct flags_set *s = p;
|
||||
unsigned newflags = s->flags;
|
||||
unsigned oldflags = bi->bi_flags & s->mask;
|
||||
|
||||
if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
|
||||
!capable(CAP_LINUX_IMMUTABLE))
|
||||
return -EPERM;
|
||||
|
||||
if (!S_ISREG(bi->bi_mode) &&
|
||||
!S_ISDIR(bi->bi_mode) &&
|
||||
(newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
|
||||
return -EINVAL;
|
||||
|
||||
if ((newflags ^ oldflags) & BCH_INODE_casefolded) {
|
||||
#ifdef CONFIG_UNICODE
|
||||
int ret = 0;
|
||||
/* Not supported on individual files. */
|
||||
if (!S_ISDIR(bi->bi_mode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* Make sure the dir is empty, as otherwise we'd need to
|
||||
* rehash everything and update the dirent keys.
|
||||
*/
|
||||
ret = bch2_empty_dir_trans(trans, inode_inum(inode));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
|
||||
#else
|
||||
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (s->set_projinherit) {
|
||||
bi->bi_fields_set &= ~(1 << Inode_opt_project);
|
||||
bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
|
||||
}
|
||||
|
||||
bi->bi_flags &= ~s->mask;
|
||||
bi->bi_flags |= newflags;
|
||||
|
||||
bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
|
||||
{
|
||||
unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
|
||||
|
||||
return put_user(flags, arg);
|
||||
}
|
||||
|
||||
static int bch2_ioc_setflags(struct bch_fs *c,
|
||||
struct file *file,
|
||||
struct bch_inode_info *inode,
|
||||
void __user *arg)
|
||||
{
|
||||
struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
|
||||
unsigned uflags;
|
||||
int ret;
|
||||
|
||||
if (get_user(uflags, (int __user *) arg))
|
||||
return -EFAULT;
|
||||
|
||||
s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
|
||||
if (uflags)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
inode_lock(&inode->v);
|
||||
if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
|
||||
ret = -EACCES;
|
||||
goto setflags_out;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
|
||||
bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
|
||||
ATTR_CTIME);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
setflags_out:
|
||||
inode_unlock(&inode->v);
|
||||
mnt_drop_write_file(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
|
||||
struct fsxattr __user *arg)
|
||||
{
|
||||
struct fsxattr fa = { 0 };
|
||||
|
||||
fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
|
||||
|
||||
if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
|
||||
fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
|
||||
|
||||
fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
|
||||
|
||||
if (copy_to_user(arg, &fa, sizeof(fa)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fssetxattr_inode_update_fn(struct btree_trans *trans,
|
||||
struct bch_inode_info *inode,
|
||||
struct bch_inode_unpacked *bi,
|
||||
void *p)
|
||||
{
|
||||
struct flags_set *s = p;
|
||||
|
||||
if (s->projid != bi->bi_project) {
|
||||
bi->bi_fields_set |= 1U << Inode_opt_project;
|
||||
bi->bi_project = s->projid;
|
||||
}
|
||||
|
||||
return bch2_inode_flags_set(trans, inode, bi, p);
|
||||
}
|
||||
|
||||
static int bch2_ioc_fssetxattr(struct bch_fs *c,
|
||||
struct file *file,
|
||||
struct bch_inode_info *inode,
|
||||
struct fsxattr __user *arg)
|
||||
{
|
||||
struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
|
||||
struct fsxattr fa;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&fa, arg, sizeof(fa)))
|
||||
return -EFAULT;
|
||||
|
||||
s.set_projinherit = true;
|
||||
s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
|
||||
fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
|
||||
|
||||
s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
|
||||
if (fa.fsx_xflags)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (fa.fsx_projid >= U32_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* inode fields accessible via the xattr interface are stored with a +1
|
||||
* bias, so that 0 means unset:
|
||||
*/
|
||||
s.projid = fa.fsx_projid + 1;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
inode_lock(&inode->v);
|
||||
if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
|
||||
ret = -EACCES;
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
|
||||
bch2_set_projid(c, inode, fa.fsx_projid) ?:
|
||||
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
|
||||
ATTR_CTIME);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
err:
|
||||
inode_unlock(&inode->v);
|
||||
mnt_drop_write_file(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_reinherit_attrs_fn(struct btree_trans *trans,
|
||||
struct bch_inode_info *inode,
|
||||
struct bch_inode_unpacked *bi,
|
||||
@@ -558,23 +358,6 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
long ret;
|
||||
|
||||
switch (cmd) {
|
||||
case FS_IOC_GETFLAGS:
|
||||
ret = bch2_ioc_getflags(inode, (int __user *) arg);
|
||||
break;
|
||||
|
||||
case FS_IOC_SETFLAGS:
|
||||
ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
|
||||
break;
|
||||
|
||||
case FS_IOC_FSGETXATTR:
|
||||
ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
|
||||
break;
|
||||
|
||||
case FS_IOC_FSSETXATTR:
|
||||
ret = bch2_ioc_fssetxattr(c, file, inode,
|
||||
(void __user *) arg);
|
||||
break;
|
||||
|
||||
case BCHFS_IOC_REINHERIT_ATTRS:
|
||||
ret = bch2_ioc_reinherit_attrs(c, file, inode,
|
||||
(void __user *) arg);
|
||||
|
||||
@@ -2,81 +2,6 @@
|
||||
#ifndef _BCACHEFS_FS_IOCTL_H
|
||||
#define _BCACHEFS_FS_IOCTL_H
|
||||
|
||||
/* Inode flags: */
|
||||
|
||||
/* bcachefs inode flags -> vfs inode flags: */
|
||||
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
|
||||
[__BCH_INODE_sync] = S_SYNC,
|
||||
[__BCH_INODE_immutable] = S_IMMUTABLE,
|
||||
[__BCH_INODE_append] = S_APPEND,
|
||||
[__BCH_INODE_noatime] = S_NOATIME,
|
||||
[__BCH_INODE_casefolded] = S_CASEFOLD,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
|
||||
static const __maybe_unused unsigned bch_flags_to_uflags[] = {
|
||||
[__BCH_INODE_sync] = FS_SYNC_FL,
|
||||
[__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
|
||||
[__BCH_INODE_append] = FS_APPEND_FL,
|
||||
[__BCH_INODE_nodump] = FS_NODUMP_FL,
|
||||
[__BCH_INODE_noatime] = FS_NOATIME_FL,
|
||||
[__BCH_INODE_casefolded] = FS_CASEFOLD_FL,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
|
||||
static const __maybe_unused unsigned bch_flags_to_xflags[] = {
|
||||
[__BCH_INODE_sync] = FS_XFLAG_SYNC,
|
||||
[__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
|
||||
[__BCH_INODE_append] = FS_XFLAG_APPEND,
|
||||
[__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
|
||||
[__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
|
||||
//[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
|
||||
};
|
||||
|
||||
#define set_flags(_map, _in, _out) \
|
||||
do { \
|
||||
unsigned _i; \
|
||||
\
|
||||
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
|
||||
if ((_in) & (1 << _i)) \
|
||||
(_out) |= _map[_i]; \
|
||||
else \
|
||||
(_out) &= ~_map[_i]; \
|
||||
} while (0)
|
||||
|
||||
#define map_flags(_map, _in) \
|
||||
({ \
|
||||
unsigned _out = 0; \
|
||||
\
|
||||
set_flags(_map, _in, _out); \
|
||||
_out; \
|
||||
})
|
||||
|
||||
#define map_flags_rev(_map, _in) \
|
||||
({ \
|
||||
unsigned _i, _out = 0; \
|
||||
\
|
||||
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
|
||||
if ((_in) & _map[_i]) { \
|
||||
(_out) |= 1 << _i; \
|
||||
(_in) &= ~_map[_i]; \
|
||||
} \
|
||||
(_out); \
|
||||
})
|
||||
|
||||
#define map_defined(_map) \
|
||||
({ \
|
||||
unsigned _in = ~0; \
|
||||
\
|
||||
map_flags_rev(_map, _in); \
|
||||
})
|
||||
|
||||
/* Set VFS inode flags from bcachefs inode: */
|
||||
static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
|
||||
{
|
||||
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
|
||||
}
|
||||
|
||||
long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
|
||||
long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
|
||||
|
||||
|
||||
469
fs/bcachefs/fs.c
469
fs/bcachefs/fs.c
@@ -33,6 +33,7 @@
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/exportfs.h>
|
||||
#include <linux/fiemap.h>
|
||||
#include <linux/fileattr.h>
|
||||
#include <linux/fs_context.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pagemap.h>
|
||||
@@ -51,6 +52,22 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
|
||||
struct bch_inode_unpacked *,
|
||||
struct bch_subvolume *);
|
||||
|
||||
/* Set VFS inode flags from bcachefs inode: */
|
||||
static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode)
|
||||
{
|
||||
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
|
||||
[__BCH_INODE_sync] = S_SYNC,
|
||||
[__BCH_INODE_immutable] = S_IMMUTABLE,
|
||||
[__BCH_INODE_append] = S_APPEND,
|
||||
[__BCH_INODE_noatime] = S_NOATIME,
|
||||
};
|
||||
|
||||
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
|
||||
|
||||
if (bch2_inode_casefold(c, &inode->ei_inode))
|
||||
inode->v.i_flags |= S_CASEFOLD;
|
||||
}
|
||||
|
||||
void bch2_inode_update_after_write(struct btree_trans *trans,
|
||||
struct bch_inode_info *inode,
|
||||
struct bch_inode_unpacked *bi,
|
||||
@@ -79,7 +96,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans,
|
||||
|
||||
inode->ei_inode = *bi;
|
||||
|
||||
bch2_inode_flags_to_vfs(inode);
|
||||
bch2_inode_flags_to_vfs(c, inode);
|
||||
}
|
||||
|
||||
int __must_check bch2_write_inode(struct bch_fs *c,
|
||||
@@ -631,13 +648,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
||||
const struct qstr *name)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter dirent_iter = {};
|
||||
subvol_inum inum = {};
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
struct qstr lookup_name;
|
||||
int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
struct btree_iter dirent_iter = {};
|
||||
struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
|
||||
dir_hash_info, dir, name, 0);
|
||||
int ret = bkey_err(k);
|
||||
dir_hash_info, dir, &lookup_name, 0);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
@@ -825,6 +847,11 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
|
||||
*/
|
||||
set_nlink(&inode->v, 0);
|
||||
}
|
||||
|
||||
if (IS_CASEFOLDED(vdir)) {
|
||||
d_invalidate(dentry);
|
||||
d_prune_aliases(&inode->v);
|
||||
}
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
|
||||
@@ -1235,10 +1262,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap,
|
||||
return finish_open_simple(file, 0);
|
||||
}
|
||||
|
||||
struct bch_fiemap_extent {
|
||||
struct bkey_buf kbuf;
|
||||
unsigned flags;
|
||||
};
|
||||
|
||||
static int bch2_fill_extent(struct bch_fs *c,
|
||||
struct fiemap_extent_info *info,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
struct bch_fiemap_extent *fe)
|
||||
{
|
||||
struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k);
|
||||
unsigned flags = fe->flags;
|
||||
|
||||
BUG_ON(!k.k->size);
|
||||
|
||||
if (bkey_extent_is_direct_data(k.k)) {
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
@@ -1291,110 +1328,223 @@ static int bch2_fill_extent(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan a range of an inode for data in pagecache.
|
||||
*
|
||||
* Intended to be retryable, so don't modify the output params until success is
|
||||
* imminent.
|
||||
*/
|
||||
static int
|
||||
bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end,
|
||||
bool nonblock)
|
||||
{
|
||||
loff_t dstart, dend;
|
||||
|
||||
dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock);
|
||||
if (dstart < 0)
|
||||
return dstart;
|
||||
|
||||
if (dstart == *end) {
|
||||
*start = dstart;
|
||||
return 0;
|
||||
}
|
||||
|
||||
dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock);
|
||||
if (dend < 0)
|
||||
return dend;
|
||||
|
||||
/* race */
|
||||
BUG_ON(dstart == dend);
|
||||
|
||||
*start = dstart;
|
||||
*end = dend;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan a range of pagecache that corresponds to a file mapping hole in the
|
||||
* extent btree. If data is found, fake up an extent key so it looks like a
|
||||
* delalloc extent to the rest of the fiemap processing code.
|
||||
*/
|
||||
static int
|
||||
bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode,
|
||||
u64 start, u64 end, struct bch_fiemap_extent *cur)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_extent *delextent;
|
||||
struct bch_extent_ptr ptr = {};
|
||||
loff_t dstart = start << 9, dend = end << 9;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We hold btree locks here so we cannot block on folio locks without
|
||||
* dropping trans locks first. Run a nonblocking scan for the common
|
||||
* case of no folios over holes and fall back on failure.
|
||||
*
|
||||
* Note that dropping locks like this is technically racy against
|
||||
* writeback inserting to the extent tree, but a non-sync fiemap scan is
|
||||
* fundamentally racy with writeback anyways. Therefore, just report the
|
||||
* range as delalloc regardless of whether we have to cycle trans locks.
|
||||
*/
|
||||
ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true);
|
||||
if (ret == -EAGAIN)
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Create a fake extent key in the buffer. We have to add a dummy extent
|
||||
* pointer for the fill code to add an extent entry. It's explicitly
|
||||
* zeroed to reflect delayed allocation (i.e. phys offset 0).
|
||||
*/
|
||||
bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64));
|
||||
delextent = bkey_extent_init(cur->kbuf.k);
|
||||
delextent->k.p = POS(inode->ei_inum.inum, dend >> 9);
|
||||
delextent->k.size = (dend - dstart) >> 9;
|
||||
bch2_bkey_append_ptr(&delextent->k_i, ptr);
|
||||
|
||||
cur->flags = FIEMAP_EXTENT_DELALLOC;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_next_fiemap_extent(struct btree_trans *trans,
|
||||
struct bch_inode_info *inode,
|
||||
u64 start, u64 end,
|
||||
struct bch_fiemap_extent *cur)
|
||||
{
|
||||
u32 snapshot;
|
||||
int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct btree_iter iter;
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
|
||||
SPOS(inode->ei_inum.inum, start, snapshot), 0);
|
||||
|
||||
struct bkey_s_c k =
|
||||
bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end));
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k);
|
||||
|
||||
/*
|
||||
* Does the pagecache or the btree take precedence?
|
||||
*
|
||||
* It _should_ be the pagecache, so that we correctly report delalloc
|
||||
* extents when dirty in the pagecache (we're COW, after all).
|
||||
*
|
||||
* But we'd have to add per-sector writeback tracking to
|
||||
* bch_folio_state, otherwise we report delalloc extents for clean
|
||||
* cached data in the pagecache.
|
||||
*
|
||||
* We should do this, but even then fiemap won't report stable mappings:
|
||||
* on bcachefs data moves around in the background (copygc, rebalance)
|
||||
* and we don't provide a way for userspace to lock that out.
|
||||
*/
|
||||
if (k.k &&
|
||||
bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)),
|
||||
pagecache_start)) {
|
||||
bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k);
|
||||
bch2_cut_front(iter.pos, cur->kbuf.k);
|
||||
bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k);
|
||||
cur->flags = 0;
|
||||
} else if (k.k) {
|
||||
bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k);
|
||||
}
|
||||
|
||||
if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) {
|
||||
unsigned sectors = cur->kbuf.k->k.size;
|
||||
s64 offset_into_extent = 0;
|
||||
enum btree_id data_btree = BTREE_ID_extents;
|
||||
int ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent,
|
||||
&cur->kbuf);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
struct bkey_i *k = cur->kbuf.k;
|
||||
sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent);
|
||||
|
||||
bch2_cut_front(POS(k->k.p.inode,
|
||||
bkey_start_offset(&k->k) + offset_into_extent),
|
||||
k);
|
||||
bch2_key_resize(&k->k, sectors);
|
||||
k->k.p = iter.pos;
|
||||
k->k.p.offset += k->k.size;
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
struct bch_fs *c = vinode->i_sb->s_fs_info;
|
||||
struct bch_inode_info *ei = to_bch_ei(vinode);
|
||||
struct btree_trans *trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf cur, prev;
|
||||
bool have_extent = false;
|
||||
struct bch_fiemap_extent cur, prev;
|
||||
int ret = 0;
|
||||
|
||||
ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
|
||||
ret = fiemap_prep(&ei->v, info, start, &len, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
|
||||
if (start + len < start)
|
||||
return -EINVAL;
|
||||
|
||||
start >>= 9;
|
||||
u64 end = (start + len) >> 9;
|
||||
|
||||
bch2_bkey_buf_init(&cur.kbuf);
|
||||
bch2_bkey_buf_init(&prev.kbuf);
|
||||
bkey_init(&prev.kbuf.k->k);
|
||||
|
||||
bch2_bkey_buf_init(&cur);
|
||||
bch2_bkey_buf_init(&prev);
|
||||
trans = bch2_trans_get(c);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
|
||||
POS(ei->v.i_ino, start), 0);
|
||||
|
||||
while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||
enum btree_id data_btree = BTREE_ID_extents;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
u32 snapshot;
|
||||
ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
|
||||
while (start < end) {
|
||||
ret = lockrestart_do(trans,
|
||||
bch2_next_fiemap_extent(trans, ei, start, end, &cur));
|
||||
if (ret)
|
||||
continue;
|
||||
goto err;
|
||||
|
||||
bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
|
||||
BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start);
|
||||
BUG_ON(cur.kbuf.k->k.p.offset > end);
|
||||
|
||||
k = bch2_btree_iter_peek_max(trans, &iter, end);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
if (!k.k)
|
||||
if (bkey_start_offset(&cur.kbuf.k->k) == end)
|
||||
break;
|
||||
|
||||
if (!bkey_extent_is_data(k.k) &&
|
||||
k.k->type != KEY_TYPE_reservation) {
|
||||
bch2_btree_iter_advance(trans, &iter);
|
||||
continue;
|
||||
}
|
||||
start = cur.kbuf.k->k.p.offset;
|
||||
|
||||
s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
|
||||
unsigned sectors = k.k->size - offset_into_extent;
|
||||
|
||||
bch2_bkey_buf_reassemble(&cur, c, k);
|
||||
|
||||
ret = bch2_read_indirect_extent(trans, &data_btree,
|
||||
&offset_into_extent, &cur);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
k = bkey_i_to_s_c(cur.k);
|
||||
bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
|
||||
|
||||
sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent);
|
||||
|
||||
bch2_cut_front(POS(k.k->p.inode,
|
||||
bkey_start_offset(k.k) +
|
||||
offset_into_extent),
|
||||
cur.k);
|
||||
bch2_key_resize(&cur.k->k, sectors);
|
||||
cur.k->k.p = iter.pos;
|
||||
cur.k->k.p.offset += cur.k->k.size;
|
||||
|
||||
if (have_extent) {
|
||||
if (!bkey_deleted(&prev.kbuf.k->k)) {
|
||||
bch2_trans_unlock(trans);
|
||||
ret = bch2_fill_extent(c, info,
|
||||
bkey_i_to_s_c(prev.k), 0);
|
||||
ret = bch2_fill_extent(c, info, &prev);
|
||||
if (ret)
|
||||
break;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bkey_copy(prev.k, cur.k);
|
||||
have_extent = true;
|
||||
|
||||
bch2_btree_iter_set_pos(trans, &iter,
|
||||
POS(iter.pos.inode, iter.pos.offset + sectors));
|
||||
bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k);
|
||||
prev.flags = cur.flags;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (!ret && have_extent) {
|
||||
if (!bkey_deleted(&prev.kbuf.k->k)) {
|
||||
bch2_trans_unlock(trans);
|
||||
ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
|
||||
FIEMAP_EXTENT_LAST);
|
||||
prev.flags |= FIEMAP_EXTENT_LAST;
|
||||
ret = bch2_fill_extent(c, info, &prev);
|
||||
}
|
||||
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
bch2_bkey_buf_exit(&cur, c);
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
return ret < 0 ? ret : 0;
|
||||
bch2_bkey_buf_exit(&cur.kbuf, c);
|
||||
bch2_bkey_buf_exit(&prev.kbuf, c);
|
||||
|
||||
return bch2_err_class(ret < 0 ? ret : 0);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct bch_vm_ops = {
|
||||
@@ -1449,6 +1599,165 @@ static int bch2_open(struct inode *vinode, struct file *file)
|
||||
return generic_file_open(vinode, file);
|
||||
}
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
|
||||
static const __maybe_unused unsigned bch_flags_to_uflags[] = {
|
||||
[__BCH_INODE_sync] = FS_SYNC_FL,
|
||||
[__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
|
||||
[__BCH_INODE_append] = FS_APPEND_FL,
|
||||
[__BCH_INODE_nodump] = FS_NODUMP_FL,
|
||||
[__BCH_INODE_noatime] = FS_NOATIME_FL,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
|
||||
static const __maybe_unused unsigned bch_flags_to_xflags[] = {
|
||||
[__BCH_INODE_sync] = FS_XFLAG_SYNC,
|
||||
[__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
|
||||
[__BCH_INODE_append] = FS_XFLAG_APPEND,
|
||||
[__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
|
||||
[__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
|
||||
};
|
||||
|
||||
static int bch2_fileattr_get(struct dentry *dentry,
|
||||
struct fileattr *fa)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
|
||||
fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags));
|
||||
|
||||
if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
|
||||
fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
|
||||
|
||||
if (bch2_inode_casefold(c, &inode->ei_inode))
|
||||
fa->flags |= FS_CASEFOLD_FL;
|
||||
|
||||
fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ];
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct flags_set {
|
||||
unsigned mask;
|
||||
unsigned flags;
|
||||
unsigned projid;
|
||||
bool set_project;
|
||||
bool set_casefold;
|
||||
bool casefold;
|
||||
};
|
||||
|
||||
static int fssetxattr_inode_update_fn(struct btree_trans *trans,
|
||||
struct bch_inode_info *inode,
|
||||
struct bch_inode_unpacked *bi,
|
||||
void *p)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct flags_set *s = p;
|
||||
|
||||
/*
|
||||
* We're relying on btree locking here for exclusion with other ioctl
|
||||
* calls - use the flags in the btree (@bi), not inode->i_flags:
|
||||
*/
|
||||
if (!S_ISREG(bi->bi_mode) &&
|
||||
!S_ISDIR(bi->bi_mode) &&
|
||||
(s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags)
|
||||
return -EINVAL;
|
||||
|
||||
if (s->casefold != bch2_inode_casefold(c, bi)) {
|
||||
#ifdef CONFIG_UNICODE
|
||||
int ret = 0;
|
||||
/* Not supported on individual files. */
|
||||
if (!S_ISDIR(bi->bi_mode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* Make sure the dir is empty, as otherwise we'd need to
|
||||
* rehash everything and update the dirent keys.
|
||||
*/
|
||||
ret = bch2_empty_dir_trans(trans, inode_inum(inode));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
|
||||
|
||||
bi->bi_casefold = s->casefold + 1;
|
||||
bi->bi_fields_set |= BIT(Inode_opt_casefold);
|
||||
|
||||
#else
|
||||
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (s->set_project) {
|
||||
bi->bi_project = s->projid;
|
||||
bi->bi_fields_set |= BIT(Inode_opt_project);
|
||||
}
|
||||
|
||||
bi->bi_flags &= ~s->mask;
|
||||
bi->bi_flags |= s->flags;
|
||||
|
||||
bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_fileattr_set(struct mnt_idmap *idmap,
|
||||
struct dentry *dentry,
|
||||
struct fileattr *fa)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct flags_set s = {};
|
||||
int ret;
|
||||
|
||||
if (fa->fsx_valid) {
|
||||
fa->fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
|
||||
|
||||
s.mask = map_defined(bch_flags_to_xflags);
|
||||
s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags);
|
||||
if (fa->fsx_xflags)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (fa->fsx_projid >= U32_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* inode fields accessible via the xattr interface are stored with a +1
|
||||
* bias, so that 0 means unset:
|
||||
*/
|
||||
if ((inode->ei_inode.bi_project ||
|
||||
fa->fsx_projid) &&
|
||||
inode->ei_inode.bi_project != fa->fsx_projid + 1) {
|
||||
s.projid = fa->fsx_projid + 1;
|
||||
s.set_project = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fa->flags_valid) {
|
||||
s.mask = map_defined(bch_flags_to_uflags);
|
||||
|
||||
s.set_casefold = true;
|
||||
s.casefold = (fa->flags & FS_CASEFOLD_FL) != 0;
|
||||
fa->flags &= ~FS_CASEFOLD_FL;
|
||||
|
||||
s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags);
|
||||
if (fa->flags)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
|
||||
(s.set_project
|
||||
? bch2_set_projid(c, inode, fa->fsx_projid)
|
||||
: 0) ?:
|
||||
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
|
||||
ATTR_CTIME);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations bch_file_operations = {
|
||||
.open = bch2_open,
|
||||
.llseek = bch2_llseek,
|
||||
@@ -1476,6 +1785,8 @@ static const struct inode_operations bch_file_inode_operations = {
|
||||
.get_inode_acl = bch2_get_acl,
|
||||
.set_acl = bch2_set_acl,
|
||||
#endif
|
||||
.fileattr_get = bch2_fileattr_get,
|
||||
.fileattr_set = bch2_fileattr_set,
|
||||
};
|
||||
|
||||
static const struct inode_operations bch_dir_inode_operations = {
|
||||
@@ -1496,6 +1807,8 @@ static const struct inode_operations bch_dir_inode_operations = {
|
||||
.get_inode_acl = bch2_get_acl,
|
||||
.set_acl = bch2_set_acl,
|
||||
#endif
|
||||
.fileattr_get = bch2_fileattr_get,
|
||||
.fileattr_set = bch2_fileattr_set,
|
||||
};
|
||||
|
||||
static const struct file_operations bch_dir_file_operations = {
|
||||
@@ -1518,6 +1831,8 @@ static const struct inode_operations bch_symlink_inode_operations = {
|
||||
.get_inode_acl = bch2_get_acl,
|
||||
.set_acl = bch2_set_acl,
|
||||
#endif
|
||||
.fileattr_get = bch2_fileattr_get,
|
||||
.fileattr_set = bch2_fileattr_set,
|
||||
};
|
||||
|
||||
static const struct inode_operations bch_special_inode_operations = {
|
||||
@@ -1528,6 +1843,8 @@ static const struct inode_operations bch_special_inode_operations = {
|
||||
.get_inode_acl = bch2_get_acl,
|
||||
.set_acl = bch2_set_acl,
|
||||
#endif
|
||||
.fileattr_get = bch2_fileattr_get,
|
||||
.fileattr_set = bch2_fileattr_set,
|
||||
};
|
||||
|
||||
static const struct address_space_operations bch_address_space_operations = {
|
||||
|
||||
@@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi)
|
||||
{
|
||||
/* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */
|
||||
return bi->bi_casefold
|
||||
? bi->bi_casefold - 1
|
||||
: c->opts.casefold;
|
||||
}
|
||||
|
||||
/* i_nlink: */
|
||||
|
||||
static inline unsigned nlink_bias(umode_t mode)
|
||||
|
||||
@@ -103,7 +103,8 @@ struct bch_inode_generation {
|
||||
x(bi_parent_subvol, 32) \
|
||||
x(bi_nocow, 8) \
|
||||
x(bi_depth, 32) \
|
||||
x(bi_inodes_32bit, 8)
|
||||
x(bi_inodes_32bit, 8) \
|
||||
x(bi_casefold, 8)
|
||||
|
||||
/* subset of BCH_INODE_FIELDS */
|
||||
#define BCH_INODE_OPTS() \
|
||||
@@ -117,7 +118,8 @@ struct bch_inode_generation {
|
||||
x(background_target, 16) \
|
||||
x(erasure_code, 16) \
|
||||
x(nocow, 8) \
|
||||
x(inodes_32bit, 8)
|
||||
x(inodes_32bit, 8) \
|
||||
x(casefold, 8)
|
||||
|
||||
enum inode_opt_id {
|
||||
#define x(name, ...) \
|
||||
@@ -137,8 +139,7 @@ enum inode_opt_id {
|
||||
x(i_sectors_dirty, 6) \
|
||||
x(unlinked, 7) \
|
||||
x(backptr_untrusted, 8) \
|
||||
x(has_child_snapshot, 9) \
|
||||
x(casefolded, 10)
|
||||
x(has_child_snapshot, 9)
|
||||
|
||||
/* bits 20+ reserved for packed fields below: */
|
||||
|
||||
|
||||
@@ -281,7 +281,24 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
|
||||
|
||||
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
|
||||
buf->u64s_reserved) << c->block_bits;
|
||||
BUG_ON(sectors > buf->sectors);
|
||||
if (unlikely(sectors > buf->sectors)) {
|
||||
struct printbuf err = PRINTBUF;
|
||||
err.atomic++;
|
||||
|
||||
prt_printf(&err, "journal entry overran reserved space: %u > %u\n",
|
||||
sectors, buf->sectors);
|
||||
prt_printf(&err, "buf u64s %u u64s reserved %u cur_entry_u64s %u block_bits %u\n",
|
||||
le32_to_cpu(buf->data->u64s), buf->u64s_reserved,
|
||||
j->cur_entry_u64s,
|
||||
c->block_bits);
|
||||
prt_printf(&err, "fatal error - emergency read only");
|
||||
bch2_journal_halt_locked(j);
|
||||
|
||||
bch_err(c, "%s", err.buf);
|
||||
printbuf_exit(&err);
|
||||
return;
|
||||
}
|
||||
|
||||
buf->sectors = sectors;
|
||||
|
||||
/*
|
||||
@@ -1462,8 +1479,6 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
|
||||
j->last_empty_seq = cur_seq - 1; /* to match j->seq */
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
set_bit(JOURNAL_running, &j->flags);
|
||||
j->last_flush_write = jiffies;
|
||||
|
||||
j->reservations.idx = journal_cur_seq(j);
|
||||
@@ -1474,6 +1489,21 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_journal_set_replay_done(struct journal *j)
|
||||
{
|
||||
/*
|
||||
* journal_space_available must happen before setting JOURNAL_running
|
||||
* JOURNAL_running must happen before JOURNAL_replay_done
|
||||
*/
|
||||
spin_lock(&j->lock);
|
||||
bch2_journal_space_available(j);
|
||||
|
||||
set_bit(JOURNAL_need_flush_write, &j->flags);
|
||||
set_bit(JOURNAL_running, &j->flags);
|
||||
set_bit(JOURNAL_replay_done, &j->flags);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
/* init/exit: */
|
||||
|
||||
void bch2_dev_journal_exit(struct bch_dev *ca)
|
||||
|
||||
@@ -437,12 +437,6 @@ static inline int bch2_journal_error(struct journal *j)
|
||||
|
||||
struct bch_dev;
|
||||
|
||||
static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||
{
|
||||
BUG_ON(!test_bit(JOURNAL_running, &j->flags));
|
||||
set_bit(JOURNAL_replay_done, &j->flags);
|
||||
}
|
||||
|
||||
void bch2_journal_unblock(struct journal *);
|
||||
void bch2_journal_block(struct journal *);
|
||||
struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *);
|
||||
@@ -459,6 +453,7 @@ void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
|
||||
|
||||
void bch2_fs_journal_stop(struct journal *);
|
||||
int bch2_fs_journal_start(struct journal *, u64);
|
||||
void bch2_journal_set_replay_done(struct journal *);
|
||||
|
||||
void bch2_dev_journal_exit(struct bch_dev *);
|
||||
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
|
||||
|
||||
@@ -252,7 +252,10 @@ void bch2_journal_space_available(struct journal *j)
|
||||
|
||||
bch2_journal_set_watermark(j);
|
||||
out:
|
||||
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
|
||||
j->cur_entry_sectors = !ret
|
||||
? round_down(j->space[journal_space_discarded].next_entry,
|
||||
block_sectors(c))
|
||||
: 0;
|
||||
j->cur_entry_error = ret;
|
||||
|
||||
if (!ret)
|
||||
|
||||
@@ -356,6 +356,13 @@ static int bch2_copygc_thread(void *arg)
|
||||
|
||||
set_freezable();
|
||||
|
||||
/*
|
||||
* Data move operations can't run until after check_snapshots has
|
||||
* completed, and bch2_snapshot_is_ancestor() is available.
|
||||
*/
|
||||
kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
|
||||
kthread_should_stop());
|
||||
|
||||
bch2_move_stats_init(&move_stats, "copygc");
|
||||
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
|
||||
writepoint_ptr(&c->copygc_write_point),
|
||||
|
||||
@@ -5,6 +5,15 @@
|
||||
unsigned long bch2_copygc_wait_amount(struct bch_fs *);
|
||||
void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
static inline void bch2_copygc_wakeup(struct bch_fs *c)
|
||||
{
|
||||
rcu_read_lock();
|
||||
struct task_struct *p = rcu_dereference(c->copygc_thread);
|
||||
if (p)
|
||||
wake_up_process(p);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void bch2_copygc_stop(struct bch_fs *);
|
||||
int bch2_copygc_start(struct bch_fs *);
|
||||
void bch2_fs_copygc_init(struct bch_fs *);
|
||||
|
||||
@@ -47,10 +47,6 @@ int bch2_create_trans(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* Inherit casefold state from parent. */
|
||||
if (S_ISDIR(mode))
|
||||
new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded;
|
||||
|
||||
if (!(flags & BCH_CREATE_SNAPSHOT)) {
|
||||
/* Normal create path - allocate a new inode: */
|
||||
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
|
||||
|
||||
@@ -228,6 +228,11 @@ enum fsck_err_opts {
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_ERASURE_CODE, false, \
|
||||
NULL, "Enable erasure coding (DO NOT USE YET)") \
|
||||
x(casefold, u8, \
|
||||
OPT_FS|OPT_INODE|OPT_FORMAT, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_CASEFOLD, false, \
|
||||
NULL, "Dirent lookups are casefolded") \
|
||||
x(inodes_32bit, u8, \
|
||||
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
|
||||
@@ -262,7 +262,7 @@ int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_set_rebalance_needs_scan_trans(trans, inum));
|
||||
rebalance_wakeup(c);
|
||||
bch2_rebalance_wakeup(c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -581,6 +581,13 @@ static int bch2_rebalance_thread(void *arg)
|
||||
|
||||
set_freezable();
|
||||
|
||||
/*
|
||||
* Data move operations can't run until after check_snapshots has
|
||||
* completed, and bch2_snapshot_is_ancestor() is available.
|
||||
*/
|
||||
kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
|
||||
kthread_should_stop());
|
||||
|
||||
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
|
||||
writepoint_ptr(&c->rebalance_write_point),
|
||||
true);
|
||||
@@ -664,7 +671,7 @@ void bch2_rebalance_stop(struct bch_fs *c)
|
||||
c->rebalance.thread = NULL;
|
||||
|
||||
if (p) {
|
||||
/* for sychronizing with rebalance_wakeup() */
|
||||
/* for sychronizing with bch2_rebalance_wakeup() */
|
||||
synchronize_rcu();
|
||||
|
||||
kthread_stop(p);
|
||||
|
||||
@@ -37,7 +37,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64);
|
||||
int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
|
||||
int bch2_set_fs_needs_rebalance(struct bch_fs *);
|
||||
|
||||
static inline void rebalance_wakeup(struct bch_fs *c)
|
||||
static inline void bch2_rebalance_wakeup(struct bch_fs *c)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "logged_ops.h"
|
||||
#include "move.h"
|
||||
#include "movinggc.h"
|
||||
#include "namei.h"
|
||||
#include "quota.h"
|
||||
#include "rebalance.h"
|
||||
@@ -1129,13 +1130,13 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
set_bit(BCH_FS_accounting_replay_done, &c->flags);
|
||||
bch2_journal_set_replay_done(&c->journal);
|
||||
|
||||
ret = bch2_fs_read_write_early(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
set_bit(BCH_FS_accounting_replay_done, &c->flags);
|
||||
bch2_journal_set_replay_done(&c->journal);
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
ret = bch2_dev_usage_init(ca, false);
|
||||
if (ret) {
|
||||
@@ -1194,6 +1195,9 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
|
||||
c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
|
||||
|
||||
bch2_copygc_wakeup(c);
|
||||
bch2_rebalance_wakeup(c);
|
||||
|
||||
if (enabled_qtypes(c)) {
|
||||
ret = bch2_fs_quota_read(c);
|
||||
if (ret)
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "journal.h"
|
||||
#include "lru.h"
|
||||
#include "logged_ops.h"
|
||||
#include "movinggc.h"
|
||||
#include "rebalance.h"
|
||||
#include "recovery.h"
|
||||
#include "recovery_passes.h"
|
||||
@@ -262,49 +263,52 @@ int bch2_run_recovery_passes(struct bch_fs *c)
|
||||
*/
|
||||
c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
|
||||
|
||||
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
|
||||
c->next_recovery_pass = c->curr_recovery_pass + 1;
|
||||
spin_lock_irq(&c->recovery_pass_lock);
|
||||
|
||||
spin_lock_irq(&c->recovery_pass_lock);
|
||||
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
|
||||
unsigned prev_done = c->recovery_pass_done;
|
||||
unsigned pass = c->curr_recovery_pass;
|
||||
|
||||
c->next_recovery_pass = pass + 1;
|
||||
|
||||
if (c->opts.recovery_pass_last &&
|
||||
c->curr_recovery_pass > c->opts.recovery_pass_last) {
|
||||
spin_unlock_irq(&c->recovery_pass_lock);
|
||||
c->curr_recovery_pass > c->opts.recovery_pass_last)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!should_run_recovery_pass(c, pass)) {
|
||||
c->curr_recovery_pass++;
|
||||
c->recovery_pass_done = max(c->recovery_pass_done, pass);
|
||||
if (should_run_recovery_pass(c, pass)) {
|
||||
spin_unlock_irq(&c->recovery_pass_lock);
|
||||
continue;
|
||||
ret = bch2_run_recovery_pass(c, pass) ?:
|
||||
bch2_journal_flush(&c->journal);
|
||||
|
||||
if (!ret && !test_bit(BCH_FS_error, &c->flags))
|
||||
bch2_clear_recovery_pass_required(c, pass);
|
||||
spin_lock_irq(&c->recovery_pass_lock);
|
||||
|
||||
if (c->next_recovery_pass < c->curr_recovery_pass) {
|
||||
/*
|
||||
* bch2_run_explicit_recovery_pass() was called: we
|
||||
* can't always catch -BCH_ERR_restart_recovery because
|
||||
* it may have been called from another thread (btree
|
||||
* node read completion)
|
||||
*/
|
||||
ret = 0;
|
||||
c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
|
||||
} else {
|
||||
c->recovery_passes_complete |= BIT_ULL(pass);
|
||||
c->recovery_pass_done = max(c->recovery_pass_done, pass);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&c->recovery_pass_lock);
|
||||
|
||||
ret = bch2_run_recovery_pass(c, pass) ?:
|
||||
bch2_journal_flush(&c->journal);
|
||||
|
||||
if (!ret && !test_bit(BCH_FS_error, &c->flags))
|
||||
bch2_clear_recovery_pass_required(c, pass);
|
||||
|
||||
spin_lock_irq(&c->recovery_pass_lock);
|
||||
if (c->next_recovery_pass < c->curr_recovery_pass) {
|
||||
/*
|
||||
* bch2_run_explicit_recovery_pass() was called: we
|
||||
* can't always catch -BCH_ERR_restart_recovery because
|
||||
* it may have been called from another thread (btree
|
||||
* node read completion)
|
||||
*/
|
||||
ret = 0;
|
||||
c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
|
||||
} else {
|
||||
c->recovery_passes_complete |= BIT_ULL(pass);
|
||||
c->recovery_pass_done = max(c->recovery_pass_done, pass);
|
||||
}
|
||||
c->curr_recovery_pass = c->next_recovery_pass;
|
||||
spin_unlock_irq(&c->recovery_pass_lock);
|
||||
|
||||
if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
|
||||
c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) {
|
||||
bch2_copygc_wakeup(c);
|
||||
bch2_rebalance_wakeup(c);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irq(&c->recovery_pass_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -396,7 +396,7 @@ u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
|
||||
u32 subvol = 0, s;
|
||||
|
||||
rcu_read_lock();
|
||||
while (id) {
|
||||
while (id && bch2_snapshot_exists(c, id)) {
|
||||
s = snapshot_t(c, id)->subvol;
|
||||
|
||||
if (s && (!subvol || s < subvol))
|
||||
|
||||
@@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
|
||||
|
||||
struct bch_hash_info {
|
||||
u8 type;
|
||||
struct unicode_map *cf_encoding;
|
||||
struct unicode_map *cf_encoding;
|
||||
/*
|
||||
* For crc32 or crc64 string hashes the first key value of
|
||||
* the siphash_key (k0) is used as the key.
|
||||
@@ -44,11 +44,10 @@ struct bch_hash_info {
|
||||
static inline struct bch_hash_info
|
||||
bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
|
||||
{
|
||||
/* XXX ick */
|
||||
struct bch_hash_info info = {
|
||||
.type = INODE_STR_HASH(bi),
|
||||
#ifdef CONFIG_UNICODE
|
||||
.cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL,
|
||||
.cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
|
||||
#endif
|
||||
.siphash_key = { .k0 = bi->bi_hash_seed }
|
||||
};
|
||||
|
||||
@@ -1102,7 +1102,8 @@ int bch2_write_super(struct bch_fs *c)
|
||||
prt_str(&buf, ")");
|
||||
bch2_fs_fatal_error(c, ": %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -BCH_ERR_sb_not_downgraded;
|
||||
ret = -BCH_ERR_sb_not_downgraded;
|
||||
goto out;
|
||||
}
|
||||
|
||||
darray_for_each(online_devices, ca) {
|
||||
|
||||
@@ -418,32 +418,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_fs_read_write_late(struct bch_fs *c)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Data move operations can't run until after check_snapshots has
|
||||
* completed, and bch2_snapshot_is_ancestor() is available.
|
||||
*
|
||||
* Ideally we'd start copygc/rebalance earlier instead of waiting for
|
||||
* all of recovery/fsck to complete:
|
||||
*/
|
||||
ret = bch2_copygc_start(c);
|
||||
if (ret) {
|
||||
bch_err(c, "error starting copygc thread");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = bch2_rebalance_start(c);
|
||||
if (ret) {
|
||||
bch_err(c, "error starting rebalance thread");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
{
|
||||
int ret;
|
||||
@@ -466,29 +440,28 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
|
||||
clear_bit(BCH_FS_clean_shutdown, &c->flags);
|
||||
|
||||
/*
|
||||
* First journal write must be a flush write: after a clean shutdown we
|
||||
* don't read the journal, so the first journal write may end up
|
||||
* overwriting whatever was there previously, and there must always be
|
||||
* at least one non-flush write in the journal or recovery will fail:
|
||||
*/
|
||||
set_bit(JOURNAL_need_flush_write, &c->journal.flags);
|
||||
set_bit(JOURNAL_running, &c->journal.flags);
|
||||
|
||||
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
percpu_ref_reinit(&ca->io_ref[WRITE]);
|
||||
}
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
/*
|
||||
* First journal write must be a flush write: after a clean shutdown we
|
||||
* don't read the journal, so the first journal write may end up
|
||||
* overwriting whatever was there previously, and there must always be
|
||||
* at least one non-flush write in the journal or recovery will fail:
|
||||
*/
|
||||
spin_lock(&c->journal.lock);
|
||||
set_bit(JOURNAL_need_flush_write, &c->journal.flags);
|
||||
set_bit(JOURNAL_running, &c->journal.flags);
|
||||
bch2_journal_space_available(&c->journal);
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
ret = bch2_fs_mark_dirty(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
spin_lock(&c->journal.lock);
|
||||
bch2_journal_space_available(&c->journal);
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
ret = bch2_journal_reclaim_start(&c->journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
@@ -504,10 +477,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
atomic_long_inc(&c->writes[i]);
|
||||
}
|
||||
#endif
|
||||
if (!early) {
|
||||
ret = bch2_fs_read_write_late(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_copygc_start(c);
|
||||
if (ret) {
|
||||
bch_err_msg(c, ret, "error starting copygc thread");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_rebalance_start(c);
|
||||
if (ret) {
|
||||
bch_err_msg(c, ret, "error starting rebalance thread");
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_do_discards(c);
|
||||
@@ -553,6 +533,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
|
||||
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
|
||||
bch2_free_pending_node_rewrites(c);
|
||||
bch2_free_fsck_errs(c);
|
||||
bch2_fs_accounting_exit(c);
|
||||
bch2_fs_sb_errors_exit(c);
|
||||
bch2_fs_counters_exit(c);
|
||||
@@ -1023,6 +1004,40 @@ static void print_mount_opts(struct bch_fs *c)
|
||||
printbuf_exit(&p);
|
||||
}
|
||||
|
||||
static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i, flags = 0;
|
||||
|
||||
if (c->opts.very_degraded)
|
||||
flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
|
||||
|
||||
if (c->opts.degraded)
|
||||
flags |= BCH_FORCE_IF_DEGRADED;
|
||||
|
||||
if (!c->opts.degraded &&
|
||||
!c->opts.very_degraded) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
|
||||
if (!bch2_member_exists(c->disk_sb.sb, i))
|
||||
continue;
|
||||
|
||||
ca = bch2_dev_locked(c, i);
|
||||
|
||||
if (!bch2_dev_is_online(ca) &&
|
||||
(ca->mi.state == BCH_MEMBER_STATE_rw ||
|
||||
ca->mi.state == BCH_MEMBER_STATE_ro)) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
|
||||
}
|
||||
|
||||
int bch2_fs_start(struct bch_fs *c)
|
||||
{
|
||||
time64_t now = ktime_get_real_seconds();
|
||||
@@ -1030,6 +1045,9 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
|
||||
print_mount_opts(c);
|
||||
|
||||
if (!bch2_fs_may_start(c))
|
||||
return -BCH_ERR_insufficient_devices_to_start;
|
||||
|
||||
down_write(&c->state_lock);
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
@@ -1082,13 +1100,10 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
wake_up(&c->ro_ref_wait);
|
||||
|
||||
down_write(&c->state_lock);
|
||||
if (c->opts.read_only) {
|
||||
if (c->opts.read_only)
|
||||
bch2_fs_read_only(c);
|
||||
} else {
|
||||
ret = !test_bit(BCH_FS_rw, &c->flags)
|
||||
? bch2_fs_read_write(c)
|
||||
: bch2_fs_read_write_late(c);
|
||||
}
|
||||
else if (!test_bit(BCH_FS_rw, &c->flags))
|
||||
ret = bch2_fs_read_write(c);
|
||||
up_write(&c->state_lock);
|
||||
|
||||
err:
|
||||
@@ -1500,7 +1515,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
|
||||
printbuf_exit(&name);
|
||||
|
||||
rebalance_wakeup(c);
|
||||
bch2_rebalance_wakeup(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1559,40 +1574,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
}
|
||||
|
||||
static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i, flags = 0;
|
||||
|
||||
if (c->opts.very_degraded)
|
||||
flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
|
||||
|
||||
if (c->opts.degraded)
|
||||
flags |= BCH_FORCE_IF_DEGRADED;
|
||||
|
||||
if (!c->opts.degraded &&
|
||||
!c->opts.very_degraded) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
|
||||
if (!bch2_member_exists(c->disk_sb.sb, i))
|
||||
continue;
|
||||
|
||||
ca = bch2_dev_locked(c, i);
|
||||
|
||||
if (!bch2_dev_is_online(ca) &&
|
||||
(ca->mi.state == BCH_MEMBER_STATE_rw ||
|
||||
ca->mi.state == BCH_MEMBER_STATE_ro)) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
|
||||
}
|
||||
|
||||
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
bch2_dev_io_ref_stop(ca, WRITE);
|
||||
@@ -1646,7 +1627,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (new_state == BCH_MEMBER_STATE_rw)
|
||||
__bch2_dev_read_write(c, ca);
|
||||
|
||||
rebalance_wakeup(c);
|
||||
bch2_rebalance_wakeup(c);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2228,11 +2209,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
}
|
||||
up_write(&c->state_lock);
|
||||
|
||||
if (!bch2_fs_may_start(c)) {
|
||||
ret = -BCH_ERR_insufficient_devices_to_start;
|
||||
goto err_print;
|
||||
}
|
||||
|
||||
if (!c->opts.nostart) {
|
||||
ret = bch2_fs_start(c);
|
||||
if (ret)
|
||||
|
||||
@@ -654,11 +654,10 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
|
||||
bch2_set_rebalance_needs_scan(c, 0);
|
||||
|
||||
if (v && id == Opt_rebalance_enabled)
|
||||
rebalance_wakeup(c);
|
||||
bch2_rebalance_wakeup(c);
|
||||
|
||||
if (v && id == Opt_copygc_enabled &&
|
||||
c->copygc_thread)
|
||||
wake_up_process(c->copygc_thread);
|
||||
if (v && id == Opt_copygc_enabled)
|
||||
bch2_copygc_wakeup(c);
|
||||
|
||||
if (id == Opt_discard && !ca) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
@@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
|
||||
*/
|
||||
static int test_peek_end(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
delete_test_keys(c);
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
@@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
|
||||
|
||||
static int test_peek_end_extents(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
delete_test_keys(c);
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
|
||||
@@ -739,4 +739,42 @@ static inline void memcpy_swab(void *_dst, void *_src, size_t len)
|
||||
*--dst = *src++;
|
||||
}
|
||||
|
||||
#define set_flags(_map, _in, _out) \
|
||||
do { \
|
||||
unsigned _i; \
|
||||
\
|
||||
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
|
||||
if ((_in) & (1 << _i)) \
|
||||
(_out) |= _map[_i]; \
|
||||
else \
|
||||
(_out) &= ~_map[_i]; \
|
||||
} while (0)
|
||||
|
||||
#define map_flags(_map, _in) \
|
||||
({ \
|
||||
unsigned _out = 0; \
|
||||
\
|
||||
set_flags(_map, _in, _out); \
|
||||
_out; \
|
||||
})
|
||||
|
||||
#define map_flags_rev(_map, _in) \
|
||||
({ \
|
||||
unsigned _i, _out = 0; \
|
||||
\
|
||||
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
|
||||
if ((_in) & _map[_i]) { \
|
||||
(_out) |= 1 << _i; \
|
||||
(_in) &= ~_map[_i]; \
|
||||
} \
|
||||
(_out); \
|
||||
})
|
||||
|
||||
#define map_defined(_map) \
|
||||
({ \
|
||||
unsigned _in = ~0; \
|
||||
\
|
||||
map_flags_rev(_map, _in); \
|
||||
})
|
||||
|
||||
#endif /* _BCACHEFS_UTIL_H */
|
||||
|
||||
Reference in New Issue
Block a user