From 8d939f4bd7b225d8b157b1329881d2719c0ecb29 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:47 -0800 Subject: [PATCH 01/28] xfs: constify the xfs_sb predicates Change the xfs_sb predicates to take a const struct xfs_sb pointer because they do not change the superblock. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_format.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index e1bfee0c3b1a..a24ab46aaebc 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -278,7 +278,7 @@ struct xfs_dsb { #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) -static inline bool xfs_sb_is_v5(struct xfs_sb *sbp) +static inline bool xfs_sb_is_v5(const struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } @@ -287,12 +287,12 @@ static inline bool xfs_sb_is_v5(struct xfs_sb *sbp) * Detect a mismatched features2 field. Older kernels read/wrote * this into the wrong slot, so to be safe we keep them in sync. */ -static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp) +static inline bool xfs_sb_has_mismatched_features2(const struct xfs_sb *sbp) { return sbp->sb_bad_features2 != sbp->sb_features2; } -static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) +static inline bool xfs_sb_version_hasmorebits(const struct xfs_sb *sbp) { return xfs_sb_is_v5(sbp) || (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); @@ -342,8 +342,8 @@ static inline void xfs_sb_version_addprojid32(struct xfs_sb *sbp) #define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL static inline bool xfs_sb_has_compat_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_compat & feature) != 0; } @@ -360,8 +360,8 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_ro_compat & feature) != 0; } @@ -387,8 +387,8 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool xfs_sb_has_incompat_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_incompat & feature) != 0; } @@ -399,8 +399,8 @@ xfs_sb_has_incompat_feature( #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL static inline bool xfs_sb_has_incompat_log_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_log_incompat & feature) != 0; } @@ -420,7 +420,7 @@ xfs_sb_add_incompat_log_features( sbp->sb_features_log_incompat |= features; } -static inline bool xfs_sb_version_haslogxattrs(struct xfs_sb *sbp) +static inline bool xfs_sb_version_haslogxattrs(const struct xfs_sb *sbp) { return xfs_sb_is_v5(sbp) && (sbp->sb_features_log_incompat & XFS_SB_FEAT_INCOMPAT_LOG_XATTRS); From fdf5703b61101eb29747f7ed23ad57192cf277fd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:48 -0800 Subject: [PATCH 02/28] xfs: constify the xfs_inode predicates Change the xfs_inode predicates to take a const struct xfs_inode pointer because they do not change the inode. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_inode.h | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 693770f9bb09..a8430f30d6df 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3040,7 +3040,7 @@ xfs_inode_alloc_unitsize( /* Should we always be using copy on write for file writes? */ bool xfs_is_always_cow_inode( - struct xfs_inode *ip) + const struct xfs_inode *ip) { return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 03944b6c5fba..41444a557576 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -100,7 +100,7 @@ static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip) return ip->i_prev_unlinked != 0; } -static inline bool xfs_inode_has_attr_fork(struct xfs_inode *ip) +static inline bool xfs_inode_has_attr_fork(const struct xfs_inode *ip) { return ip->i_forkoff > 0; } @@ -271,7 +271,7 @@ xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags) return ret; } -static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) +static inline bool xfs_is_reflink_inode(const struct xfs_inode *ip) { return ip->i_diflags2 & XFS_DIFLAG2_REFLINK; } @@ -285,9 +285,9 @@ static inline bool xfs_is_metadata_inode(const struct xfs_inode *ip) xfs_is_quota_inode(&mp->m_sb, ip->i_ino); } -bool xfs_is_always_cow_inode(struct xfs_inode *ip); +bool xfs_is_always_cow_inode(const struct xfs_inode *ip); -static inline bool xfs_is_cow_inode(struct xfs_inode *ip) +static inline bool xfs_is_cow_inode(const struct xfs_inode *ip) { return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip); } @@ -301,17 +301,17 @@ static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip) * Check if an inode has any data in the COW fork. This might be often false * even for inodes with the reflink flag when there is no pending COW operation. */ -static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) +static inline bool xfs_inode_has_cow_data(const struct xfs_inode *ip) { return ip->i_cowfp && ip->i_cowfp->if_bytes; } -static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip) +static inline bool xfs_inode_has_bigtime(const struct xfs_inode *ip) { return ip->i_diflags2 & XFS_DIFLAG2_BIGTIME; } -static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) +static inline bool xfs_inode_has_large_extent_counts(const struct xfs_inode *ip) { return ip->i_diflags2 & XFS_DIFLAG2_NREXT64; } @@ -320,7 +320,7 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) * Decide if this file is a realtime file whose data allocation unit is larger * than a single filesystem block. */ -static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip) +static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip) { return XFS_IS_REALTIME_INODE(ip) && ip->i_mount->m_sb.sb_rextsize > 1; } @@ -625,9 +625,9 @@ void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes); static inline bool xfs_inode_unlinked_incomplete( - struct xfs_inode *ip) + const struct xfs_inode *ip) { - return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip); + return VFS_IC(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip); } int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip); int xfs_inode_reload_unlinked(struct xfs_inode *ip); From 4d272929a5258074328dae206c935634e0fd1a54 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:48 -0800 Subject: [PATCH 03/28] xfs: rename metadata inode predicates The predicate xfs_internal_inum tells us if an inumber refers to one of the inodes rooted in the superblock. Soon we're going to have internal inodes in a metadata directory tree, so this helper should be renamed to capture its limited scope. Ondisk inodes will soon have a flag to indicate that they're metadata inodes. Head off some confusion by renaming the xfs_is_metadata_inode predicate to xfs_is_internal_inode. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_types.c | 4 ++-- fs/xfs/libxfs/xfs_types.h | 2 +- fs/xfs/scrub/common.c | 2 +- fs/xfs/scrub/inode.c | 2 +- fs/xfs/scrub/inode_repair.c | 2 +- fs/xfs/scrub/orphanage.c | 2 +- fs/xfs/scrub/parent.c | 2 +- fs/xfs/scrub/refcount_repair.c | 2 +- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_itable.c | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index c91db4f51407..1cbfe57e971d 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -111,7 +111,7 @@ xfs_verify_ino( /* Is this an internal inode number? */ inline bool -xfs_internal_inum( +xfs_is_sb_inum( struct xfs_mount *mp, xfs_ino_t ino) { @@ -129,7 +129,7 @@ xfs_verify_dir_ino( struct xfs_mount *mp, xfs_ino_t ino) { - if (xfs_internal_inum(mp, ino)) + if (xfs_is_sb_inum(mp, ino)) return false; return xfs_verify_ino(mp, ino); } diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index d3cb6ff3b913..25053a66c225 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -230,7 +230,7 @@ bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno, xfs_fsblock_t len); bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); -bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); +bool xfs_is_sb_inum(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); bool xfs_verify_rtbext(struct xfs_mount *mp, xfs_rtblock_t rtbno, diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index e8b5e73bab60..777959f8ec72 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -948,7 +948,7 @@ xchk_iget_for_scrubbing( return xchk_install_live_inode(sc, ip_in); /* Reject internal metadata files and obviously bad inode numbers. */ - if (xfs_internal_inum(mp, sc->sm->sm_ino)) + if (xfs_is_sb_inum(mp, sc->sm->sm_ino)) return -ENOENT; if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) return -ENOENT; diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index d32716fb2fec..4a8637afb0e2 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -95,7 +95,7 @@ xchk_setup_inode( } /* Reject internal metadata files and obviously bad inode numbers. */ - if (xfs_internal_inum(mp, sc->sm->sm_ino)) + if (xfs_is_sb_inum(mp, sc->sm->sm_ino)) return -ENOENT; if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) return -ENOENT; diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 5da9e1a387a8..9085d6d11aeb 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -1762,7 +1762,7 @@ xrep_inode_pptr( * Metadata inodes are rooted in the superblock and do not have any * parents. */ - if (xfs_is_metadata_inode(ip)) + if (xfs_is_internal_inode(ip)) return 0; /* Inode already has an attr fork; no further work possible here. */ diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 7148d8362db8..5f0d42392608 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -295,7 +295,7 @@ xrep_orphanage_can_adopt( return false; if (sc->ip == sc->orphanage) return false; - if (xfs_internal_inum(sc->mp, sc->ip->i_ino)) + if (xfs_is_sb_inum(sc->mp, sc->ip->i_ino)) return false; return true; } diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 91e7b51ce068..20711a68a874 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -910,7 +910,7 @@ xchk_pptr_looks_zapped( * any parents. Hence the attr fork will not be initialized, but * there are no parent pointers that might have been zapped. */ - if (xfs_is_metadata_inode(ip)) + if (xfs_is_internal_inode(ip)) return false; /* diff --git a/fs/xfs/scrub/refcount_repair.c b/fs/xfs/scrub/refcount_repair.c index 4240fff459cb..4e572b81c986 100644 --- a/fs/xfs/scrub/refcount_repair.c +++ b/fs/xfs/scrub/refcount_repair.c @@ -215,7 +215,7 @@ xrep_refc_rmap_shareable( return false; /* Metadata in files are never shareable */ - if (xfs_internal_inum(mp, rmap->rm_owner)) + if (xfs_is_sb_inum(mp, rmap->rm_owner)) return false; /* Metadata and unwritten file blocks are not shareable. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a8430f30d6df..046554601055 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1295,7 +1295,7 @@ xfs_inode_needs_inactive( return false; /* Metadata inodes require explicit resource cleanup. */ - if (xfs_is_metadata_inode(ip)) + if (xfs_is_internal_inode(ip)) return false; /* Want to clean out the cow blocks if there are any. */ @@ -1388,7 +1388,7 @@ xfs_inactive( goto out; /* Metadata inodes require explicit resource cleanup. */ - if (xfs_is_metadata_inode(ip)) + if (xfs_is_internal_inode(ip)) goto out; /* Try to clean out the cow blocks if there are any. */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 41444a557576..df7262f4674f 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -276,7 +276,7 @@ static inline bool xfs_is_reflink_inode(const struct xfs_inode *ip) return ip->i_diflags2 & XFS_DIFLAG2_REFLINK; } -static inline bool xfs_is_metadata_inode(const struct xfs_inode *ip) +static inline bool xfs_is_internal_inode(const struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c0757ab99495..37c2b50d877e 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -69,7 +69,7 @@ xfs_bulkstat_one_int( vfsuid_t vfsuid; vfsgid_t vfsgid; - if (xfs_internal_inum(mp, ino)) + if (xfs_is_sb_inum(mp, ino)) goto out_advance; error = xfs_iget(mp, tp, ino, From ecc8065dfa18b5f6d35e0e2eff96e7378071307b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:49 -0800 Subject: [PATCH 04/28] xfs: standardize EXPERIMENTAL warning generation Refactor the open-coded warnings about EXPERIMENTAL feature use into a standard helper before we go adding more experimental features. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/scrub.c | 3 +-- fs/xfs/xfs_fsops.c | 4 +--- fs/xfs/xfs_message.c | 47 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_message.h | 19 ++++++++++++------ fs/xfs/xfs_mount.h | 20 +++++++++++++------ fs/xfs/xfs_pnfs.c | 3 +-- fs/xfs/xfs_super.c | 10 +++------- fs/xfs/xfs_xattr.c | 3 +-- 8 files changed, 81 insertions(+), 28 deletions(-) diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 4cbcf7a86dbe..8a5c3af4cfda 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -605,8 +605,7 @@ xfs_scrub_metadata( if (error) goto out; - xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB, - "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SCRUB); sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS); if (!sc) { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 82812a458cf1..28dde215c899 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -162,9 +162,7 @@ xfs_growfs_data_private( error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, delta, last_pag, &lastag_extended); } else { - xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK, - "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); - + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SHRINK); error = xfs_ag_shrink_space(last_pag, &tp, -delta); } xfs_perag_put(last_pag); diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 8f495cc23903..c7aa16af6f09 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -131,3 +131,50 @@ xfs_buf_alert_ratelimited( __xfs_printk(KERN_ALERT, mp, &vaf); va_end(args); } + +void +xfs_warn_experimental( + struct xfs_mount *mp, + enum xfs_experimental_feat feat) +{ + static const struct { + const char *name; + long opstate; + } features[] = { + [XFS_EXPERIMENTAL_PNFS] = { + .opstate = XFS_OPSTATE_WARNED_PNFS, + .name = "pNFS", + }, + [XFS_EXPERIMENTAL_SCRUB] = { + .opstate = XFS_OPSTATE_WARNED_SCRUB, + .name = "online scrub", + }, + [XFS_EXPERIMENTAL_SHRINK] = { + .opstate = XFS_OPSTATE_WARNED_SHRINK, + .name = "online shrink", + }, + [XFS_EXPERIMENTAL_LARP] = { + .opstate = XFS_OPSTATE_WARNED_LARP, + .name = "logged extended attributes", + }, + [XFS_EXPERIMENTAL_LBS] = { + .opstate = XFS_OPSTATE_WARNED_LBS, + .name = "large block size", + }, + [XFS_EXPERIMENTAL_EXCHRANGE] = { + .opstate = XFS_OPSTATE_WARNED_EXCHRANGE, + .name = "exchange range", + }, + [XFS_EXPERIMENTAL_PPTR] = { + .opstate = XFS_OPSTATE_WARNED_PPTR, + .name = "parent pointer", + }, + }; + ASSERT(feat >= 0 && feat < XFS_EXPERIMENTAL_MAX); + BUILD_BUG_ON(ARRAY_SIZE(features) != XFS_EXPERIMENTAL_MAX); + + if (xfs_should_warn(mp, features[feat].opstate)) + xfs_warn(mp, + "EXPERIMENTAL %s feature enabled. Use at your own risk!", + features[feat].name); +} diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index cc323775a12c..5be8be72f225 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -75,12 +75,6 @@ do { \ #define xfs_debug_ratelimited(dev, fmt, ...) \ xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__) -#define xfs_warn_mount(mp, warntag, fmt, ...) \ -do { \ - if (xfs_should_warn((mp), (warntag))) \ - xfs_warn((mp), (fmt), ##__VA_ARGS__); \ -} while (0) - #define xfs_warn_once(dev, fmt, ...) \ xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__) #define xfs_notice_once(dev, fmt, ...) \ @@ -96,4 +90,17 @@ extern void xfs_hex_dump(const void *p, int length); void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg, const char *fmt, ...); +enum xfs_experimental_feat { + XFS_EXPERIMENTAL_PNFS, + XFS_EXPERIMENTAL_SCRUB, + XFS_EXPERIMENTAL_SHRINK, + XFS_EXPERIMENTAL_LARP, + XFS_EXPERIMENTAL_LBS, + XFS_EXPERIMENTAL_EXCHRANGE, + XFS_EXPERIMENTAL_PPTR, + + XFS_EXPERIMENTAL_MAX, +}; +void xfs_warn_experimental(struct xfs_mount *mp, enum xfs_experimental_feat f); + #endif /* __XFS_MESSAGE_H */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1b698878f40c..b82977f654a5 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -467,18 +467,26 @@ __XFS_HAS_FEAT(nouuid, NOUUID) */ #define XFS_OPSTATE_BLOCKGC_ENABLED 6 +/* Kernel has logged a warning about pNFS being used on this fs. */ +#define XFS_OPSTATE_WARNED_PNFS 7 /* Kernel has logged a warning about online fsck being used on this fs. */ -#define XFS_OPSTATE_WARNED_SCRUB 7 +#define XFS_OPSTATE_WARNED_SCRUB 8 /* Kernel has logged a warning about shrink being used on this fs. */ -#define XFS_OPSTATE_WARNED_SHRINK 8 +#define XFS_OPSTATE_WARNED_SHRINK 9 /* Kernel has logged a warning about logged xattr updates being used. */ -#define XFS_OPSTATE_WARNED_LARP 9 +#define XFS_OPSTATE_WARNED_LARP 10 /* Mount time quotacheck is running */ -#define XFS_OPSTATE_QUOTACHECK_RUNNING 10 +#define XFS_OPSTATE_QUOTACHECK_RUNNING 11 /* Do we want to clear log incompat flags? */ -#define XFS_OPSTATE_UNSET_LOG_INCOMPAT 11 +#define XFS_OPSTATE_UNSET_LOG_INCOMPAT 12 /* Filesystem can use logged extended attributes */ -#define XFS_OPSTATE_USE_LARP 12 +#define XFS_OPSTATE_USE_LARP 13 +/* Kernel has logged a warning about blocksize > pagesize on this fs. */ +#define XFS_OPSTATE_WARNED_LBS 14 +/* Kernel has logged a warning about exchange-range being used on this fs. */ +#define XFS_OPSTATE_WARNED_EXCHRANGE 15 +/* Kernel has logged a warning about parent pointers being used on this fs. */ +#define XFS_OPSTATE_WARNED_PPTR 16 #define __XFS_IS_OPSTATE(name, NAME) \ static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 23d16186e1a3..6f4479deac6d 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -58,8 +58,7 @@ xfs_fs_get_uuid( { struct xfs_mount *mp = XFS_M(sb); - xfs_notice_once(mp, -"Using experimental pNFS feature, use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PNFS); if (*len < sizeof(uuid_t)) return -EINVAL; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 457c2d70968d..b7091728791b 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1657,9 +1657,7 @@ xfs_fs_fill_super( goto out_free_sb; } - xfs_warn(mp, -"EXPERIMENTAL: V5 Filesystem with Large Block Size (%d bytes) enabled.", - mp->m_sb.sb_blocksize); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS); } /* Ensure this filesystem fits in the page cache limits */ @@ -1755,12 +1753,10 @@ xfs_fs_fill_super( } if (xfs_has_exchange_range(mp)) - xfs_warn(mp, - "EXPERIMENTAL exchange-range feature enabled. Use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE); if (xfs_has_parent(mp)) - xfs_warn(mp, - "EXPERIMENTAL parent pointer feature enabled. Use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PPTR); error = xfs_mountfs(mp); if (error) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index eaf849260bd6..0f641a9091ec 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -51,8 +51,7 @@ xfs_attr_grab_log_assist( return error; xfs_set_using_logged_xattrs(mp); - xfs_warn_mount(mp, XFS_OPSTATE_WARNED_LARP, - "EXPERIMENTAL logged extended attributes feature in use. Use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LARP); return 0; } From 4f3d4dd1b04b2ba0bf236fbaa3c3c0c669aa5a47 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:49 -0800 Subject: [PATCH 05/28] xfs: define the on-disk format for the metadir feature Define the on-disk layout and feature flags for the metadata inode directory feature. Add a xfs_sb_version_hasmetadir for benefit of xfs_repair, which needs to know where the new end of the superblock lies. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_format.h | 95 ++++++++++++++++++++++++++++----- fs/xfs/libxfs/xfs_inode_buf.c | 20 +++++-- fs/xfs/libxfs/xfs_inode_util.c | 2 + fs/xfs/libxfs/xfs_log_format.h | 2 +- fs/xfs/libxfs/xfs_ondisk.h | 2 +- fs/xfs/libxfs/xfs_sb.c | 10 ++++ fs/xfs/scrub/inode.c | 2 +- fs/xfs/scrub/inode_repair.c | 9 ++-- fs/xfs/xfs_inode.h | 14 +++++ fs/xfs/xfs_inode_item.c | 7 ++- fs/xfs/xfs_inode_item_recover.c | 2 +- fs/xfs/xfs_message.c | 4 ++ fs/xfs/xfs_message.h | 1 + fs/xfs/xfs_mount.h | 4 ++ fs/xfs/xfs_super.c | 3 ++ 15 files changed, 152 insertions(+), 25 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index a24ab46aaebc..616f81045921 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -174,6 +174,8 @@ typedef struct xfs_sb { xfs_lsn_t sb_lsn; /* last write sequence */ uuid_t sb_meta_uuid; /* metadata file system unique id */ + xfs_ino_t sb_metadirino; /* metadata directory tree root */ + /* must be padded to 64 bit alignment */ } xfs_sb_t; @@ -259,6 +261,8 @@ struct xfs_dsb { __be64 sb_lsn; /* last write sequence */ uuid_t sb_meta_uuid; /* metadata file system unique id */ + __be64 sb_metadirino; /* metadata directory tree root */ + /* must be padded to 64 bit alignment */ }; @@ -374,6 +378,7 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */ #define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */ #define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */ +#define XFS_SB_FEAT_INCOMPAT_METADIR (1 << 8) /* metadata dir tree */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE | \ XFS_SB_FEAT_INCOMPAT_SPINODES | \ @@ -790,6 +795,27 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds) return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET; } +enum xfs_metafile_type { + XFS_METAFILE_UNKNOWN, /* unknown */ + XFS_METAFILE_DIR, /* metadir directory */ + XFS_METAFILE_USRQUOTA, /* user quota */ + XFS_METAFILE_GRPQUOTA, /* group quota */ + XFS_METAFILE_PRJQUOTA, /* project quota */ + XFS_METAFILE_RTBITMAP, /* rt bitmap */ + XFS_METAFILE_RTSUMMARY, /* rt summary */ + + XFS_METAFILE_MAX +} __packed; + +#define XFS_METAFILE_TYPE_STR \ + { XFS_METAFILE_UNKNOWN, "unknown" }, \ + { XFS_METAFILE_DIR, "dir" }, \ + { XFS_METAFILE_USRQUOTA, "usrquota" }, \ + { XFS_METAFILE_GRPQUOTA, "grpquota" }, \ + { XFS_METAFILE_PRJQUOTA, "prjquota" }, \ + { XFS_METAFILE_RTBITMAP, "rtbitmap" }, \ + { XFS_METAFILE_RTSUMMARY, "rtsummary" } + /* * On-disk inode structure. * @@ -812,7 +838,7 @@ struct xfs_dinode { __be16 di_mode; /* mode and type of file */ __u8 di_version; /* inode version */ __u8 di_format; /* format of di_c data */ - __be16 di_onlink; /* old number of links to file */ + __be16 di_metatype; /* XFS_METAFILE_*; was di_onlink */ __be32 di_uid; /* owner's user id */ __be32 di_gid; /* owner's group id */ __be32 di_nlink; /* number of links to file */ @@ -1088,21 +1114,60 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) * Values for di_flags2 These start by being exposed to userspace in the upper * 16 bits of the XFS_XFLAG_s range. */ -#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ -#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ -#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ -#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */ -#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */ +/* use DAX for this inode */ +#define XFS_DIFLAG2_DAX_BIT 0 -#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) -#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) -#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) -#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT) -#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT) +/* file's blocks may be shared */ +#define XFS_DIFLAG2_REFLINK_BIT 1 + +/* copy on write extent size hint */ +#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 + +/* big timestamps */ +#define XFS_DIFLAG2_BIGTIME_BIT 3 + +/* large extent counters */ +#define XFS_DIFLAG2_NREXT64_BIT 4 + +/* + * The inode contains filesystem metadata and can be found through the metadata + * directory tree. Metadata inodes must satisfy the following constraints: + * + * - V5 filesystem (and ftype) are enabled; + * - The only valid modes are regular files and directories; + * - The access bits must be zero; + * - DMAPI event and state masks are zero; + * - The user and group IDs must be zero; + * - The project ID can be used as a u32 annotation; + * - The immutable, sync, noatime, nodump, nodefrag flags must be set. + * - The dax flag must not be set. + * - Directories must have nosymlinks set. + * + * These requirements are chosen defensively to minimize the ability of + * userspace to read or modify the contents, should a metadata file ever + * escape to userspace. + * + * There are further constraints on the directory tree itself: + * + * - Metadata inodes must never be resolvable through the root directory; + * - They must never be accessed by userspace; + * - Metadata directory entries must have correct ftype. + * + * Superblock-rooted metadata files must have the METADATA iflag set even + * though they do not have a parent directory. + */ +#define XFS_DIFLAG2_METADATA_BIT 5 + +#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT) +#define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT) +#define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT) +#define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT) +#define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT) +#define XFS_DIFLAG2_METADATA (1ULL << XFS_DIFLAG2_METADATA_BIT) #define XFS_DIFLAG2_ANY \ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ - XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64) + XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA) static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) { @@ -1117,6 +1182,12 @@ static inline bool xfs_dinode_has_large_extent_counts( (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_NREXT64)); } +static inline bool xfs_dinode_is_metadir(const struct xfs_dinode *dip) +{ + return dip->di_version >= 3 && + (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA)); +} + /* * Inode number format: * low inopblog bits - offset in block diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 79babeac9d75..78febaa0d692 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -209,12 +209,15 @@ xfs_inode_from_disk( * They will also be unconditionally written back to disk as v2 inodes. */ if (unlikely(from->di_version == 1)) { - set_nlink(inode, be16_to_cpu(from->di_onlink)); + /* di_metatype used to be di_onlink */ + set_nlink(inode, be16_to_cpu(from->di_metatype)); ip->i_projid = 0; } else { set_nlink(inode, be32_to_cpu(from->di_nlink)); ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | be16_to_cpu(from->di_projid_lo); + if (xfs_dinode_is_metadir(from)) + ip->i_metatype = be16_to_cpu(from->di_metatype); } i_uid_write(inode, be32_to_cpu(from->di_uid)); @@ -315,7 +318,10 @@ xfs_inode_to_disk( struct inode *inode = VFS_I(ip); to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); - to->di_onlink = 0; + if (xfs_is_metadir_inode(ip)) + to->di_metatype = cpu_to_be16(ip->i_metatype); + else + to->di_metatype = 0; to->di_format = xfs_ifork_format(&ip->i_df); to->di_uid = cpu_to_be32(i_uid_read(inode)); @@ -523,8 +529,11 @@ xfs_dinode_verify( * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with * di_onlink==0, so we can check that. */ - if (dip->di_version >= 2) { - if (dip->di_onlink) + if (dip->di_version == 2) { + if (dip->di_metatype) + return __this_address; + } else if (dip->di_version >= 3) { + if (!xfs_dinode_is_metadir(dip) && dip->di_metatype) return __this_address; } @@ -546,7 +555,8 @@ xfs_dinode_verify( if (dip->di_nlink) return __this_address; } else { - if (dip->di_onlink) + /* di_metatype used to be di_onlink */ + if (dip->di_metatype) return __this_address; } } diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index ec64eda3bbe2..deb0b7c00a1f 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -224,6 +224,8 @@ xfs_inode_inherit_flags2( } if (pip->i_diflags2 & XFS_DIFLAG2_DAX) ip->i_diflags2 |= XFS_DIFLAG2_DAX; + if (xfs_is_metadir_inode(pip)) + ip->i_diflags2 |= XFS_DIFLAG2_METADATA; /* Don't let invalid cowextsize hints propagate. */ failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 3e6682ed656b..ace7384a275b 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -404,7 +404,7 @@ struct xfs_log_dinode { uint16_t di_mode; /* mode and type of file */ int8_t di_version; /* inode version */ int8_t di_format; /* format of di_c data */ - uint8_t di_pad3[2]; /* unused in v2/3 inodes */ + uint16_t di_metatype; /* metadata type, if DIFLAG2_METADATA */ uint32_t di_uid; /* owner's user id */ uint32_t di_gid; /* owner's group id */ uint32_t di_nlink; /* number of links to file */ diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 23c133fd36f5..8bca86e350fd 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -37,7 +37,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_dinode, 176); XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot, 104); XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk, 136); - XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 264); + XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 272); XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index f7a07e61341d..19fa999b4032 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -180,6 +180,8 @@ xfs_sb_version_to_features( features |= XFS_FEAT_EXCHANGE_RANGE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT) features |= XFS_FEAT_PARENT; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) + features |= XFS_FEAT_METADIR; return features; } @@ -689,6 +691,11 @@ __xfs_sb_from_disk( /* Convert on-disk flags to in-memory flags? */ if (convert_xquota) xfs_sb_quota_from_disk(to); + + if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) + to->sb_metadirino = be64_to_cpu(from->sb_metadirino); + else + to->sb_metadirino = NULLFSINO; } void @@ -836,6 +843,9 @@ xfs_sb_to_disk( to->sb_lsn = cpu_to_be64(from->sb_lsn); if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); + + if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) + to->sb_metadirino = cpu_to_be64(from->sb_metadirino); } /* diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 4a8637afb0e2..a7ac7a4125ff 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -421,7 +421,7 @@ xchk_dinode( break; case 2: case 3: - if (dip->di_onlink != 0) + if (!xfs_dinode_is_metadir(dip) && dip->di_metatype) xchk_ino_set_corrupt(sc, ino); if (dip->di_mode == 0 && sc->ip) diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 9085d6d11aeb..1eec5c6eb110 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -521,10 +521,13 @@ STATIC void xrep_dinode_nlinks( struct xfs_dinode *dip) { - if (dip->di_version > 1) - dip->di_onlink = 0; - else + if (dip->di_version < 2) { dip->di_nlink = 0; + return; + } + + if (!xfs_dinode_is_metadir(dip)) + dip->di_metatype = 0; } /* Fix any conflicting flags that the verifiers complain about. */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index df7262f4674f..b6e959563547 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -65,6 +65,7 @@ typedef struct xfs_inode { uint16_t i_flushiter; /* incremented on flush */ }; uint8_t i_forkoff; /* attr fork offset >> 3 */ + enum xfs_metafile_type i_metatype; /* XFS_METAFILE_* */ uint16_t i_diflags; /* XFS_DIFLAG_... */ uint64_t i_diflags2; /* XFS_DIFLAG2_... */ struct timespec64 i_crtime; /* time created */ @@ -276,10 +277,23 @@ static inline bool xfs_is_reflink_inode(const struct xfs_inode *ip) return ip->i_diflags2 & XFS_DIFLAG2_REFLINK; } +static inline bool xfs_is_metadir_inode(const struct xfs_inode *ip) +{ + return ip->i_diflags2 & XFS_DIFLAG2_METADATA; +} + static inline bool xfs_is_internal_inode(const struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; + /* Any file in the metadata directory tree is a metadata inode. */ + if (xfs_has_metadir(mp)) + return xfs_is_metadir_inode(ip); + + /* + * Before metadata directories, the only metadata inodes were the + * three quota files, the realtime bitmap, and the realtime summary. + */ return ip->i_ino == mp->m_sb.sb_rbmino || ip->i_ino == mp->m_sb.sb_rsumino || xfs_is_quota_inode(&mp->m_sb, ip->i_ino); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index b509cbd191f4..912f0b1bc3cb 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -556,7 +556,6 @@ xfs_inode_to_log_dinode( to->di_projid_lo = ip->i_projid & 0xffff; to->di_projid_hi = ip->i_projid >> 16; - memset(to->di_pad3, 0, sizeof(to->di_pad3)); to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode_get_atime(inode)); to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode_get_mtime(inode)); to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode_get_ctime(inode)); @@ -590,10 +589,16 @@ xfs_inode_to_log_dinode( /* dummy value for initialisation */ to->di_crc = 0; + + if (xfs_is_metadir_inode(ip)) + to->di_metatype = ip->i_metatype; + else + to->di_metatype = 0; } else { to->di_version = 2; to->di_flushiter = ip->i_flushiter; memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); + to->di_metatype = 0; } xfs_inode_to_log_dinode_iext_counters(ip, to); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index dbdab4ce7c44..e70d2611456b 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -175,7 +175,7 @@ xfs_log_dinode_to_disk( to->di_mode = cpu_to_be16(from->di_mode); to->di_version = from->di_version; to->di_format = from->di_format; - to->di_onlink = 0; + to->di_metatype = cpu_to_be16(from->di_metatype); to->di_uid = cpu_to_be32(from->di_uid); to->di_gid = cpu_to_be32(from->di_gid); to->di_nlink = cpu_to_be32(from->di_nlink); diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index c7aa16af6f09..6ed485ff2756 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -169,6 +169,10 @@ xfs_warn_experimental( .opstate = XFS_OPSTATE_WARNED_PPTR, .name = "parent pointer", }, + [XFS_EXPERIMENTAL_METADIR] = { + .opstate = XFS_OPSTATE_WARNED_METADIR, + .name = "metadata directory tree", + }, }; ASSERT(feat >= 0 && feat < XFS_EXPERIMENTAL_MAX); BUILD_BUG_ON(ARRAY_SIZE(features) != XFS_EXPERIMENTAL_MAX); diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index 5be8be72f225..7fb36ced9df7 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -98,6 +98,7 @@ enum xfs_experimental_feat { XFS_EXPERIMENTAL_LBS, XFS_EXPERIMENTAL_EXCHRANGE, XFS_EXPERIMENTAL_PPTR, + XFS_EXPERIMENTAL_METADIR, XFS_EXPERIMENTAL_MAX, }; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b82977f654a5..6aaacfc0487e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -332,6 +332,7 @@ typedef struct xfs_mount { #define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */ #define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */ #define XFS_FEAT_EXCHANGE_RANGE (1ULL << 27) /* exchange range */ +#define XFS_FEAT_METADIR (1ULL << 28) /* metadata directory tree */ /* Mount features */ #define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */ @@ -387,6 +388,7 @@ __XFS_HAS_FEAT(bigtime, BIGTIME) __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR) __XFS_HAS_FEAT(large_extent_counts, NREXT64) __XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE) +__XFS_HAS_FEAT(metadir, METADIR) /* * Some features are always on for v5 file systems, allow the compiler to @@ -487,6 +489,8 @@ __XFS_HAS_FEAT(nouuid, NOUUID) #define XFS_OPSTATE_WARNED_EXCHRANGE 15 /* Kernel has logged a warning about parent pointers being used on this fs. */ #define XFS_OPSTATE_WARNED_PPTR 16 +/* Kernel has logged a warning about metadata dirs being used on this fs. */ +#define XFS_OPSTATE_WARNED_METADIR 17 #define __XFS_IS_OPSTATE(name, NAME) \ static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b7091728791b..be493d392960 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1731,6 +1731,9 @@ xfs_fs_fill_super( mp->m_features &= ~XFS_FEAT_DISCARD; } + if (xfs_has_metadir(mp)) + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR); + if (xfs_has_reflink(mp)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp, From dcf606914334c640fd90853ae86e275b21ba0309 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:50 -0800 Subject: [PATCH 06/28] xfs: iget for metadata inodes Create a xfs_trans_metafile_iget function for metadata inodes to ensure that when we try to iget a metadata file, the inode is allocated and its file mode matches the metadata file type the caller expects. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_metafile.h | 16 +++++++++ fs/xfs/xfs_icache.c | 65 ++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_inode.c | 1 + fs/xfs/xfs_qm.c | 23 +++++++++++-- fs/xfs/xfs_rtalloc.c | 38 ++++++++++++--------- 5 files changed, 125 insertions(+), 18 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_metafile.h diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h new file mode 100644 index 000000000000..60fe18906112 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metafile.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_METAFILE_H__ +#define __XFS_METAFILE_H__ + +/* Code specific to kernel/userspace; must be provided externally. */ + +int xfs_trans_metafile_iget(struct xfs_trans *tp, xfs_ino_t ino, + enum xfs_metafile_type metafile_type, struct xfs_inode **ipp); +int xfs_metafile_iget(struct xfs_mount *mp, xfs_ino_t ino, + enum xfs_metafile_type metafile_type, struct xfs_inode **ipp); + +#endif /* __XFS_METAFILE_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 383c24548202..aa645e357812 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -25,6 +25,9 @@ #include "xfs_ag.h" #include "xfs_log_priv.h" #include "xfs_health.h" +#include "xfs_da_format.h" +#include "xfs_dir2.h" +#include "xfs_metafile.h" #include @@ -828,6 +831,68 @@ xfs_iget( return error; } +/* + * Get a metadata inode. + * + * The metafile type must match the file mode exactly. + */ +int +xfs_trans_metafile_iget( + struct xfs_trans *tp, + xfs_ino_t ino, + enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *ip; + umode_t mode; + int error; + + error = xfs_iget(mp, tp, ino, 0, 0, &ip); + if (error == -EFSCORRUPTED) + goto whine; + if (error) + return error; + + if (VFS_I(ip)->i_nlink == 0) + goto bad_rele; + + if (metafile_type == XFS_METAFILE_DIR) + mode = S_IFDIR; + else + mode = S_IFREG; + if (inode_wrong_type(VFS_I(ip), mode)) + goto bad_rele; + + *ipp = ip; + return 0; +bad_rele: + xfs_irele(ip); +whine: + xfs_err(mp, "metadata inode 0x%llx is corrupt", ino); + return -EFSCORRUPTED; +} + +/* Grab a metadata file if the caller doesn't already have a transaction. */ +int +xfs_metafile_iget( + struct xfs_mount *mp, + xfs_ino_t ino, + enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp) +{ + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(mp, &tp); + if (error) + return error; + + error = xfs_trans_metafile_iget(tp, ino, metafile_type, ipp); + xfs_trans_cancel(tp); + return error; +} + /* * Grab the inode for reclaim exclusively. * diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 046554601055..12c5ff151edf 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -43,6 +43,7 @@ #include "xfs_parent.h" #include "xfs_xattr.h" #include "xfs_inode_util.h" +#include "xfs_metafile.h" struct kmem_cache *xfs_inode_cache; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 7e2307921deb..d0674d84af3e 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -27,6 +27,8 @@ #include "xfs_ialloc.h" #include "xfs_log_priv.h" #include "xfs_health.h" +#include "xfs_da_format.h" +#include "xfs_metafile.h" /* * The global quota manager. There is only one of these for the entire @@ -733,6 +735,17 @@ xfs_qm_destroy_quotainfo( mp->m_quotainfo = NULL; } +static inline enum xfs_metafile_type +xfs_qm_metafile_type( + unsigned int flags) +{ + if (flags & XFS_QMOPT_UQUOTA) + return XFS_METAFILE_USRQUOTA; + else if (flags & XFS_QMOPT_GQUOTA) + return XFS_METAFILE_GRPQUOTA; + return XFS_METAFILE_PRJQUOTA; +} + /* * Create an inode and return with a reference already taken, but unlocked * This is how we create quota inodes @@ -744,6 +757,7 @@ xfs_qm_qino_alloc( unsigned int flags) { struct xfs_trans *tp; + enum xfs_metafile_type metafile_type = xfs_qm_metafile_type(flags); int error; bool need_alloc = true; @@ -777,9 +791,10 @@ xfs_qm_qino_alloc( } } if (ino != NULLFSINO) { - error = xfs_iget(mp, NULL, ino, 0, 0, ipp); + error = xfs_metafile_iget(mp, ino, metafile_type, ipp); if (error) return error; + mp->m_sb.sb_gquotino = NULLFSINO; mp->m_sb.sb_pquotino = NULLFSINO; need_alloc = false; @@ -1553,16 +1568,20 @@ xfs_qm_qino_load( struct xfs_inode **ipp) { xfs_ino_t ino = NULLFSINO; + enum xfs_metafile_type metafile_type = XFS_METAFILE_UNKNOWN; switch (type) { case XFS_DQTYPE_USER: ino = mp->m_sb.sb_uquotino; + metafile_type = XFS_METAFILE_USRQUOTA; break; case XFS_DQTYPE_GROUP: ino = mp->m_sb.sb_gquotino; + metafile_type = XFS_METAFILE_GRPQUOTA; break; case XFS_DQTYPE_PROJ: ino = mp->m_sb.sb_pquotino; + metafile_type = XFS_METAFILE_PRJQUOTA; break; default: ASSERT(0); @@ -1572,7 +1591,7 @@ xfs_qm_qino_load( if (ino == NULLFSINO) return -ENOENT; - return xfs_iget(mp, NULL, ino, 0, 0, ipp); + return xfs_metafile_iget(mp, ino, metafile_type, ipp); } /* diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 3a2005a1e673..46a920b192d1 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -25,6 +25,8 @@ #include "xfs_quota.h" #include "xfs_log_priv.h" #include "xfs_health.h" +#include "xfs_da_format.h" +#include "xfs_metafile.h" /* * Return whether there are any free extents in the size range given @@ -1101,16 +1103,12 @@ xfs_rtalloc_reinit_frextents( */ static inline int xfs_rtmount_iread_extents( + struct xfs_trans *tp, struct xfs_inode *ip, unsigned int lock_class) { - struct xfs_trans *tp; int error; - error = xfs_trans_alloc_empty(ip->i_mount, &tp); - if (error) - return error; - xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class); error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); @@ -1125,7 +1123,6 @@ xfs_rtmount_iread_extents( out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class); - xfs_trans_cancel(tp); return error; } @@ -1133,45 +1130,54 @@ xfs_rtmount_iread_extents( * Get the bitmap and summary inodes and the summary cache into the mount * structure at mount time. */ -int /* error */ +int xfs_rtmount_inodes( - xfs_mount_t *mp) /* file system mount structure */ + struct xfs_mount *mp) { - int error; /* error return value */ - xfs_sb_t *sbp; + struct xfs_trans *tp; + struct xfs_sb *sbp = &mp->m_sb; + int error; - sbp = &mp->m_sb; - error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip); + error = xfs_trans_alloc_empty(mp, &tp); + if (error) + return error; + + error = xfs_trans_metafile_iget(tp, mp->m_sb.sb_rbmino, + XFS_METAFILE_RTBITMAP, &mp->m_rbmip); if (xfs_metadata_is_sick(error)) xfs_rt_mark_sick(mp, XFS_SICK_RT_BITMAP); if (error) - return error; + goto out_trans; ASSERT(mp->m_rbmip != NULL); - error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP); + error = xfs_rtmount_iread_extents(tp, mp->m_rbmip, XFS_ILOCK_RTBITMAP); if (error) goto out_rele_bitmap; - error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip); + error = xfs_trans_metafile_iget(tp, mp->m_sb.sb_rsumino, + XFS_METAFILE_RTSUMMARY, &mp->m_rsumip); if (xfs_metadata_is_sick(error)) xfs_rt_mark_sick(mp, XFS_SICK_RT_SUMMARY); if (error) goto out_rele_bitmap; ASSERT(mp->m_rsumip != NULL); - error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM); + error = xfs_rtmount_iread_extents(tp, mp->m_rsumip, XFS_ILOCK_RTSUM); if (error) goto out_rele_summary; error = xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks); if (error) goto out_rele_summary; + xfs_trans_cancel(tp); return 0; out_rele_summary: xfs_irele(mp->m_rsumip); out_rele_bitmap: xfs_irele(mp->m_rbmip); +out_trans: + xfs_trans_cancel(tp); return error; } From c555dd9b8c2d8f09ee31b17fc3ce059bacb4e359 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:51 -0800 Subject: [PATCH 07/28] xfs: load metadata directory root at mount time Load the metadata directory root inode into memory at mount time and release it at unmount time. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_mount.c | 31 +++++++++++++++++++++++++++++-- fs/xfs/xfs_mount.h | 1 + 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 25bbcc3f4ee0..2dd2606fc7e3 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -35,6 +35,7 @@ #include "xfs_trace.h" #include "xfs_ag.h" #include "xfs_rtbitmap.h" +#include "xfs_metafile.h" #include "scrub/stats.h" static DEFINE_MUTEX(xfs_uuid_table_mutex); @@ -620,6 +621,22 @@ xfs_mount_setup_inode_geom( xfs_ialloc_setup_geometry(mp); } +/* Mount the metadata directory tree root. */ +STATIC int +xfs_mount_setup_metadir( + struct xfs_mount *mp) +{ + int error; + + /* Load the metadata directory root inode into memory. */ + error = xfs_metafile_iget(mp, mp->m_sb.sb_metadirino, XFS_METAFILE_DIR, + &mp->m_metadirip); + if (error) + xfs_warn(mp, "Failed to load metadir root directory, error %d", + error); + return error; +} + /* Compute maximum possible height for per-AG btree types for this fs. */ static inline void xfs_agbtree_compute_maxlevels( @@ -866,6 +883,12 @@ xfs_mountfs( mp->m_features |= XFS_FEAT_ATTR2; } + if (xfs_has_metadir(mp)) { + error = xfs_mount_setup_metadir(mp); + if (error) + goto out_free_metadir; + } + /* * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. @@ -876,7 +899,7 @@ xfs_mountfs( xfs_warn(mp, "Failed to read root inode 0x%llx, error %d", sbp->sb_rootino, -error); - goto out_log_dealloc; + goto out_free_metadir; } ASSERT(rip != NULL); @@ -1018,6 +1041,9 @@ xfs_mountfs( xfs_irele(rip); /* Clean out dquots that might be in memory after quotacheck. */ xfs_qm_unmount(mp); + out_free_metadir: + if (mp->m_metadirip) + xfs_irele(mp->m_metadirip); /* * Inactivate all inodes that might still be in memory after a log @@ -1039,7 +1065,6 @@ xfs_mountfs( * quota inodes. */ xfs_unmount_flush_inodes(mp); - out_log_dealloc: xfs_log_mount_cancel(mp); out_inodegc_shrinker: shrinker_free(mp->m_inodegc_shrinker); @@ -1091,6 +1116,8 @@ xfs_unmountfs( xfs_qm_unmount_quotas(mp); xfs_rtunmount_inodes(mp); xfs_irele(mp->m_rootip); + if (mp->m_metadirip) + xfs_irele(mp->m_metadirip); xfs_unmount_flush_inodes(mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 6aaacfc0487e..71ed04ddd737 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -127,6 +127,7 @@ typedef struct xfs_mount { struct xfs_inode *m_rbmip; /* pointer to bitmap inode */ struct xfs_inode *m_rsumip; /* pointer to summary inode */ struct xfs_inode *m_rootip; /* pointer to root directory */ + struct xfs_inode *m_metadirip; /* ptr to metadata directory */ struct xfs_quotainfo *m_quotainfo; /* disk quota information */ struct xfs_buftarg *m_ddev_targp; /* data device */ struct xfs_buftarg *m_logdev_targp;/* log device */ From 7297fd0bebbd70efd12f72632a0f3ac49a8f59fe Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:51 -0800 Subject: [PATCH 08/28] xfs: enforce metadata inode flag Add checks for the metadata inode flag so that we don't ever leak metadata inodes out to userspace, and we don't ever try to read a regular inode as metadata. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_inode_buf.c | 70 +++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_inode_buf.h | 3 ++ fs/xfs/libxfs/xfs_metafile.h | 11 ++++++ fs/xfs/scrub/common.c | 10 ++++- fs/xfs/scrub/inode.c | 26 ++++++++++++- fs/xfs/scrub/inode_repair.c | 10 +++++ fs/xfs/xfs_icache.c | 12 +++++- fs/xfs/xfs_inode.c | 11 ++++++ 8 files changed, 147 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 78febaa0d692..424861fbf1bd 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -19,6 +19,7 @@ #include "xfs_ialloc.h" #include "xfs_dir2.h" #include "xfs_health.h" +#include "xfs_metafile.h" #include @@ -489,6 +490,69 @@ xfs_dinode_verify_nrext64( return NULL; } +/* + * Validate all the picky requirements we have for a file that claims to be + * filesystem metadata. + */ +xfs_failaddr_t +xfs_dinode_verify_metadir( + struct xfs_mount *mp, + struct xfs_dinode *dip, + uint16_t mode, + uint16_t flags, + uint64_t flags2) +{ + if (!xfs_has_metadir(mp)) + return __this_address; + + /* V5 filesystem only */ + if (dip->di_version < 3) + return __this_address; + + if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) + return __this_address; + + /* V3 inode fields that are always zero */ + if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad) + return __this_address; + if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter) + return __this_address; + + /* Metadata files can only be directories or regular files */ + if (!S_ISDIR(mode) && !S_ISREG(mode)) + return __this_address; + + /* They must have zero access permissions */ + if (mode & 0777) + return __this_address; + + /* DMAPI event and state masks are zero */ + if (dip->di_dmevmask || dip->di_dmstate) + return __this_address; + + /* + * User and group IDs must be zero. The project ID is used for + * grouping inodes. Metadata inodes are never accounted to quotas. + */ + if (dip->di_uid || dip->di_gid) + return __this_address; + + /* Mandatory inode flags must be set */ + if (S_ISDIR(mode)) { + if ((flags & XFS_METADIR_DIFLAGS) != XFS_METADIR_DIFLAGS) + return __this_address; + } else { + if ((flags & XFS_METAFILE_DIFLAGS) != XFS_METAFILE_DIFLAGS) + return __this_address; + } + + /* dax flags2 must not be set */ + if (flags2 & XFS_DIFLAG2_DAX) + return __this_address; + + return NULL; +} + xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, @@ -673,6 +737,12 @@ xfs_dinode_verify( !xfs_has_bigtime(mp)) return __this_address; + if (flags2 & XFS_DIFLAG2_METADATA) { + fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2); + if (fa) + return fa; + } + return NULL; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 585ed5a110af..8d43d2641c73 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -28,6 +28,9 @@ int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, struct xfs_dinode *dip); +xfs_failaddr_t xfs_dinode_verify_metadir(struct xfs_mount *mp, + struct xfs_dinode *dip, uint16_t mode, uint16_t flags, + uint64_t flags2); xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, uint32_t extsize, uint16_t mode, uint16_t flags); xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h index 60fe18906112..c66b0c51b461 100644 --- a/fs/xfs/libxfs/xfs_metafile.h +++ b/fs/xfs/libxfs/xfs_metafile.h @@ -6,6 +6,17 @@ #ifndef __XFS_METAFILE_H__ #define __XFS_METAFILE_H__ +/* All metadata files must have these flags set. */ +#define XFS_METAFILE_DIFLAGS (XFS_DIFLAG_IMMUTABLE | \ + XFS_DIFLAG_SYNC | \ + XFS_DIFLAG_NOATIME | \ + XFS_DIFLAG_NODUMP | \ + XFS_DIFLAG_NODEFRAG) + +/* All metadata directories must have these flags set. */ +#define XFS_METADIR_DIFLAGS (XFS_METAFILE_DIFLAGS | \ + XFS_DIFLAG_NOSYMLINKS) + /* Code specific to kernel/userspace; must be provided externally. */ int xfs_trans_metafile_iget(struct xfs_trans *tp, xfs_ino_t ino, diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 777959f8ec72..001af49b2988 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -947,9 +947,15 @@ xchk_iget_for_scrubbing( if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) return xchk_install_live_inode(sc, ip_in); - /* Reject internal metadata files and obviously bad inode numbers. */ - if (xfs_is_sb_inum(mp, sc->sm->sm_ino)) + /* + * On pre-metadir filesystems, reject internal metadata files. For + * metadir filesystems, limited scrubbing of any file in the metadata + * directory tree by handle is allowed, because that is the only way to + * validate the lack of parent pointers in the sb-root metadata inodes. + */ + if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino)) return -ENOENT; + /* Reject obviously bad inode numbers. */ if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) return -ENOENT; diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index a7ac7a4125ff..ac5c56416533 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -60,6 +60,22 @@ xchk_install_handle_iscrub( if (error) return error; + /* + * Don't allow scrubbing by handle of any non-directory inode records + * in the metadata directory tree. We don't know if any of the scans + * launched by this scrubber will end up indirectly trying to lock this + * file. + * + * Scrubbers of inode-rooted metadata files (e.g. quota files) will + * attach all the resources needed to scrub the inode and call + * xchk_inode directly. Userspace cannot call this directly. + */ + if (xfs_is_metadir_inode(ip) && !S_ISDIR(VFS_I(ip)->i_mode)) { + xchk_irele(sc, ip); + sc->ip = NULL; + return -ENOENT; + } + return xchk_prepare_iscrub(sc); } @@ -94,9 +110,15 @@ xchk_setup_inode( return xchk_prepare_iscrub(sc); } - /* Reject internal metadata files and obviously bad inode numbers. */ - if (xfs_is_sb_inum(mp, sc->sm->sm_ino)) + /* + * On pre-metadir filesystems, reject internal metadata files. For + * metadir filesystems, limited scrubbing of any file in the metadata + * directory tree by handle is allowed, because that is the only way to + * validate the lack of parent pointers in the sb-root metadata inodes. + */ + if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino)) return -ENOENT; + /* Reject obviously bad inode numbers. */ if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) return -ENOENT; diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 1eec5c6eb110..486cedbc40bb 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -568,6 +568,16 @@ xrep_dinode_flags( dip->di_nrext64_pad = 0; else if (dip->di_version >= 3) dip->di_v3_pad = 0; + + if (flags2 & XFS_DIFLAG2_METADATA) { + xfs_failaddr_t fa; + + fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags, + flags2); + if (fa) + flags2 &= ~XFS_DIFLAG2_METADATA; + } + dip->di_flags = cpu_to_be16(flags); dip->di_flags2 = cpu_to_be64(flags2); } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index aa645e357812..48543bf0f5ce 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -834,7 +834,8 @@ xfs_iget( /* * Get a metadata inode. * - * The metafile type must match the file mode exactly. + * The metafile type must match the file mode exactly, and for files in the + * metadata directory tree, it must match the inode's metatype exactly. */ int xfs_trans_metafile_iget( @@ -863,13 +864,20 @@ xfs_trans_metafile_iget( mode = S_IFREG; if (inode_wrong_type(VFS_I(ip), mode)) goto bad_rele; + if (xfs_has_metadir(mp)) { + if (!xfs_is_metadir_inode(ip)) + goto bad_rele; + if (metafile_type != ip->i_metatype) + goto bad_rele; + } *ipp = ip; return 0; bad_rele: xfs_irele(ip); whine: - xfs_err(mp, "metadata inode 0x%llx is corrupt", ino); + xfs_err(mp, "metadata inode 0x%llx type %u is corrupt", ino, + metafile_type); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 12c5ff151edf..ae94583ea3bb 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -555,8 +555,19 @@ xfs_lookup( if (error) goto out_free_name; + /* + * Fail if a directory entry in the regular directory tree points to + * a metadata file. + */ + if (XFS_IS_CORRUPT(dp->i_mount, xfs_is_metadir_inode(*ipp))) { + error = -EFSCORRUPTED; + goto out_irele; + } + return 0; +out_irele: + xfs_irele(*ipp); out_free_name: if (ci_name) kfree(ci_name->name); From 5d9b54a4ef34380aeba844a59e60abb7c65a7ff7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:52 -0800 Subject: [PATCH 09/28] xfs: read and write metadata inode directory tree Plumb in the bits we need to load metadata inodes from a named entry in a metadir directory, create (or hardlink) inodes into a metadir directory, create metadir directories, and flag inodes as being metadata files. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 4 +- fs/xfs/libxfs/xfs_metadir.c | 474 +++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_metadir.h | 47 ++++ fs/xfs/libxfs/xfs_metafile.c | 52 ++++ fs/xfs/libxfs/xfs_metafile.h | 4 + fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_trace.c | 2 + fs/xfs/xfs_trace.h | 102 ++++++++ 8 files changed, 685 insertions(+), 2 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_metadir.c create mode 100644 fs/xfs/libxfs/xfs_metadir.h create mode 100644 fs/xfs/libxfs/xfs_metafile.c diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 94cb8ca9f9da..ba418a40aeb5 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,6 +16,7 @@ xfs-y += xfs_trace.o xfs-y += $(addprefix libxfs/, \ xfs_group.o \ xfs_ag.o \ + xfs_ag_resv.o \ xfs_alloc.o \ xfs_alloc_btree.o \ xfs_attr.o \ @@ -43,7 +44,8 @@ xfs-y += $(addprefix libxfs/, \ xfs_inode_buf.o \ xfs_inode_util.o \ xfs_log_rlimit.o \ - xfs_ag_resv.o \ + xfs_metadir.o \ + xfs_metafile.o \ xfs_parent.o \ xfs_rmap.o \ xfs_rmap_btree.o \ diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c new file mode 100644 index 000000000000..0a61316b4f52 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metadir.c @@ -0,0 +1,474 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_trans.h" +#include "xfs_metafile.h" +#include "xfs_metadir.h" +#include "xfs_trace.h" +#include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_ialloc.h" +#include "xfs_bmap_btree.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_trans_space.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_parent.h" + +/* + * Metadata Directory Tree + * ======================= + * + * These functions provide an abstraction layer for looking up, creating, and + * deleting metadata inodes that live within a special metadata directory tree. + * + * This code does not manage the five existing metadata inodes: real time + * bitmap & summary; and the user, group, and quotas. All other metadata + * inodes must use only the xfs_meta{dir,file}_* functions. + * + * Callers wishing to create or hardlink a metadata inode must create an + * xfs_metadir_update structure, call the appropriate xfs_metadir* function, + * and then call xfs_metadir_commit or xfs_metadir_cancel to commit or cancel + * the update. Files in the metadata directory tree currently cannot be + * unlinked. + * + * When the metadir feature is enabled, all metadata inodes must have the + * "metadata" inode flag set to prevent them from being exposed to the outside + * world. + * + * Callers must take the ILOCK of any inode in the metadata directory tree to + * synchronize access to that inode. It is never necessary to take the IOLOCK + * or the MMAPLOCK since metadata inodes must not be exposed to user space. + */ + +static inline void +xfs_metadir_set_xname( + struct xfs_name *xname, + const char *path, + unsigned char ftype) +{ + xname->name = (const unsigned char *)path; + xname->len = strlen(path); + xname->type = ftype; +} + +/* + * Given a parent directory @dp and a metadata inode path component @xname, + * Look up the inode number in the directory, returning it in @ino. + * @xname.type must match the directory entry's ftype. + * + * Caller must hold ILOCK_EXCL. + */ +static inline int +xfs_metadir_lookup( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_name *xname, + xfs_ino_t *ino) +{ + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_args args = { + .trans = tp, + .dp = dp, + .geo = mp->m_dir_geo, + .name = xname->name, + .namelen = xname->len, + .hashval = xfs_dir2_hashname(mp, xname), + .whichfork = XFS_DATA_FORK, + .op_flags = XFS_DA_OP_OKNOENT, + .owner = dp->i_ino, + }; + int error; + + if (!S_ISDIR(VFS_I(dp)->i_mode)) + return -EFSCORRUPTED; + if (xfs_is_shutdown(mp)) + return -EIO; + + error = xfs_dir_lookup_args(&args); + if (error) + return error; + + if (!xfs_verify_ino(mp, args.inumber)) + return -EFSCORRUPTED; + if (xname->type != XFS_DIR3_FT_UNKNOWN && xname->type != args.filetype) + return -EFSCORRUPTED; + + trace_xfs_metadir_lookup(dp, xname, args.inumber); + *ino = args.inumber; + return 0; +} + +/* + * Look up and read a metadata inode from the metadata directory. If the path + * component doesn't exist, return -ENOENT. + */ +int +xfs_metadir_load( + struct xfs_trans *tp, + struct xfs_inode *dp, + const char *path, + enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp) +{ + struct xfs_name xname; + xfs_ino_t ino; + int error; + + xfs_metadir_set_xname(&xname, path, XFS_DIR3_FT_UNKNOWN); + + xfs_ilock(dp, XFS_ILOCK_EXCL); + error = xfs_metadir_lookup(tp, dp, &xname, &ino); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + if (error) + return error; + return xfs_trans_metafile_iget(tp, ino, metafile_type, ipp); +} + +/* + * Unlock and release resources after committing (or cancelling) a metadata + * directory tree operation. The caller retains its reference to @upd->ip + * and must release it explicitly. + */ +static inline void +xfs_metadir_teardown( + struct xfs_metadir_update *upd, + int error) +{ + trace_xfs_metadir_teardown(upd, error); + + if (upd->ppargs) { + xfs_parent_finish(upd->dp->i_mount, upd->ppargs); + upd->ppargs = NULL; + } + + if (upd->ip) { + if (upd->ip_locked) + xfs_iunlock(upd->ip, XFS_ILOCK_EXCL); + upd->ip_locked = false; + } + + if (upd->dp_locked) + xfs_iunlock(upd->dp, XFS_ILOCK_EXCL); + upd->dp_locked = false; +} + +/* + * Begin the process of creating a metadata file by allocating transactions + * and taking whatever resources we're going to need. + */ +int +xfs_metadir_start_create( + struct xfs_metadir_update *upd) +{ + struct xfs_mount *mp = upd->dp->i_mount; + int error; + + ASSERT(upd->dp != NULL); + ASSERT(upd->ip == NULL); + ASSERT(xfs_has_metadir(mp)); + ASSERT(upd->metafile_type != XFS_METAFILE_UNKNOWN); + + error = xfs_parent_start(mp, &upd->ppargs); + if (error) + return error; + + /* + * If we ever need the ability to create rt metadata files on a + * pre-metadir filesystem, we'll need to dqattach the parent here. + * Currently we assume that mkfs will create the files and quotacheck + * will account for them. + */ + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create, + xfs_create_space_res(mp, MAXNAMELEN), 0, 0, &upd->tp); + if (error) + goto out_teardown; + + /* + * Lock the parent directory if there is one. We can't ijoin it to + * the transaction until after the child file has been created. + */ + xfs_ilock(upd->dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + upd->dp_locked = true; + + trace_xfs_metadir_start_create(upd); + return 0; +out_teardown: + xfs_metadir_teardown(upd, error); + return error; +} + +/* + * Create a metadata inode with the given @mode, and insert it into the + * metadata directory tree at the given @upd->path. The path up to the final + * component must already exist. The final path component must not exist. + * + * The new metadata inode will be attached to the update structure @upd->ip, + * with the ILOCK held until the caller releases it. + * + * NOTE: This function may return a new inode to the caller even if it returns + * a negative error code. If an inode is passed back, the caller must finish + * setting up the inode before releasing it. + */ +int +xfs_metadir_create( + struct xfs_metadir_update *upd, + umode_t mode) +{ + struct xfs_icreate_args args = { + .pip = upd->dp, + .mode = mode, + }; + struct xfs_name xname; + struct xfs_dir_update du = { + .dp = upd->dp, + .name = &xname, + .ppargs = upd->ppargs, + }; + struct xfs_mount *mp = upd->dp->i_mount; + xfs_ino_t ino; + unsigned int resblks; + int error; + + xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL); + + /* Check that the name does not already exist in the directory. */ + xfs_metadir_set_xname(&xname, upd->path, XFS_DIR3_FT_UNKNOWN); + error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + fallthrough; + default: + return error; + } + + /* + * A newly created regular or special file just has one directory + * entry pointing to them, but a directory also the "." entry + * pointing to itself. + */ + error = xfs_dialloc(&upd->tp, &args, &ino); + if (error) + return error; + error = xfs_icreate(upd->tp, ino, &args, &upd->ip); + if (error) + return error; + du.ip = upd->ip; + xfs_metafile_set_iflag(upd->tp, upd->ip, upd->metafile_type); + upd->ip_locked = true; + + /* + * Join the directory inode to the transaction. We do not do it + * earlier because xfs_dialloc rolls the transaction. + */ + xfs_trans_ijoin(upd->tp, upd->dp, 0); + + /* Create the entry. */ + if (S_ISDIR(args.mode)) + resblks = xfs_mkdir_space_res(mp, xname.len); + else + resblks = xfs_create_space_res(mp, xname.len); + xname.type = xfs_mode_to_ftype(args.mode); + + trace_xfs_metadir_try_create(upd); + + error = xfs_dir_create_child(upd->tp, resblks, &du); + if (error) + return error; + + /* Metadir files are not accounted to quota. */ + + trace_xfs_metadir_create(upd); + + return 0; +} + +#ifndef __KERNEL__ +/* + * Begin the process of linking a metadata file by allocating transactions + * and locking whatever resources we're going to need. + */ +int +xfs_metadir_start_link( + struct xfs_metadir_update *upd) +{ + struct xfs_mount *mp = upd->dp->i_mount; + unsigned int resblks; + int nospace_error = 0; + int error; + + ASSERT(upd->dp != NULL); + ASSERT(upd->ip != NULL); + ASSERT(xfs_has_metadir(mp)); + + error = xfs_parent_start(mp, &upd->ppargs); + if (error) + return error; + + resblks = xfs_link_space_res(mp, MAXNAMELEN); + error = xfs_trans_alloc_dir(upd->dp, &M_RES(mp)->tr_link, upd->ip, + &resblks, &upd->tp, &nospace_error); + if (error) + goto out_teardown; + if (!resblks) { + /* We don't allow reservationless updates. */ + xfs_trans_cancel(upd->tp); + upd->tp = NULL; + xfs_iunlock(upd->dp, XFS_ILOCK_EXCL); + xfs_iunlock(upd->ip, XFS_ILOCK_EXCL); + error = nospace_error; + goto out_teardown; + } + + upd->dp_locked = true; + upd->ip_locked = true; + + trace_xfs_metadir_start_link(upd); + return 0; +out_teardown: + xfs_metadir_teardown(upd, error); + return error; +} + +/* + * Link the metadata directory given by @path to the inode @upd->ip. + * The path (up to the final component) must already exist, but the final + * component must not already exist. + */ +int +xfs_metadir_link( + struct xfs_metadir_update *upd) +{ + struct xfs_name xname; + struct xfs_dir_update du = { + .dp = upd->dp, + .name = &xname, + .ip = upd->ip, + .ppargs = upd->ppargs, + }; + struct xfs_mount *mp = upd->dp->i_mount; + xfs_ino_t ino; + unsigned int resblks; + int error; + + xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL); + xfs_assert_ilocked(upd->ip, XFS_ILOCK_EXCL); + + /* Look up the name in the current directory. */ + xfs_metadir_set_xname(&xname, upd->path, + xfs_mode_to_ftype(VFS_I(upd->ip)->i_mode)); + error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + fallthrough; + default: + return error; + } + + resblks = xfs_link_space_res(mp, xname.len); + error = xfs_dir_add_child(upd->tp, resblks, &du); + if (error) + return error; + + trace_xfs_metadir_link(upd); + + return 0; +} +#endif /* ! __KERNEL__ */ + +/* Commit a metadir update and unlock/drop all resources. */ +int +xfs_metadir_commit( + struct xfs_metadir_update *upd) +{ + int error; + + trace_xfs_metadir_commit(upd); + + error = xfs_trans_commit(upd->tp); + upd->tp = NULL; + + xfs_metadir_teardown(upd, error); + return error; +} + +/* Cancel a metadir update and unlock/drop all resources. */ +void +xfs_metadir_cancel( + struct xfs_metadir_update *upd, + int error) +{ + trace_xfs_metadir_cancel(upd); + + xfs_trans_cancel(upd->tp); + upd->tp = NULL; + + xfs_metadir_teardown(upd, error); +} + +/* Create a metadata for the last component of the path. */ +int +xfs_metadir_mkdir( + struct xfs_inode *dp, + const char *path, + struct xfs_inode **ipp) +{ + struct xfs_metadir_update upd = { + .dp = dp, + .path = path, + .metafile_type = XFS_METAFILE_DIR, + }; + int error; + + if (xfs_is_shutdown(dp->i_mount)) + return -EIO; + + /* Allocate a transaction to create the last directory. */ + error = xfs_metadir_start_create(&upd); + if (error) + return error; + + /* Create the subdirectory and take our reference. */ + error = xfs_metadir_create(&upd, S_IFDIR); + if (error) + goto out_cancel; + + error = xfs_metadir_commit(&upd); + if (error) + goto out_irele; + + xfs_finish_inode_setup(upd.ip); + *ipp = upd.ip; + return 0; + +out_cancel: + xfs_metadir_cancel(&upd, error); +out_irele: + /* Have to finish setting up the inode to ensure it's deleted. */ + if (upd.ip) { + xfs_finish_inode_setup(upd.ip); + xfs_irele(upd.ip); + } + return error; +} diff --git a/fs/xfs/libxfs/xfs_metadir.h b/fs/xfs/libxfs/xfs_metadir.h new file mode 100644 index 000000000000..bfecac7d3d14 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metadir.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_METADIR_H__ +#define __XFS_METADIR_H__ + +/* Cleanup widget for metadata inode creation and deletion. */ +struct xfs_metadir_update { + /* Parent directory */ + struct xfs_inode *dp; + + /* Path to metadata file */ + const char *path; + + /* Parent pointer update context */ + struct xfs_parent_args *ppargs; + + /* Child metadata file */ + struct xfs_inode *ip; + + struct xfs_trans *tp; + + enum xfs_metafile_type metafile_type; + + unsigned int dp_locked:1; + unsigned int ip_locked:1; +}; + +int xfs_metadir_load(struct xfs_trans *tp, struct xfs_inode *dp, + const char *path, enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp); + +int xfs_metadir_start_create(struct xfs_metadir_update *upd); +int xfs_metadir_create(struct xfs_metadir_update *upd, umode_t mode); + +int xfs_metadir_start_link(struct xfs_metadir_update *upd); +int xfs_metadir_link(struct xfs_metadir_update *upd); + +int xfs_metadir_commit(struct xfs_metadir_update *upd); +void xfs_metadir_cancel(struct xfs_metadir_update *upd, int error); + +int xfs_metadir_mkdir(struct xfs_inode *dp, const char *path, + struct xfs_inode **ipp); + +#endif /* __XFS_METADIR_H__ */ diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c new file mode 100644 index 000000000000..adeb25d1a444 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metafile.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_trans.h" +#include "xfs_metafile.h" +#include "xfs_trace.h" +#include "xfs_inode.h" + +/* Set up an inode to be recognized as a metadata directory inode. */ +void +xfs_metafile_set_iflag( + struct xfs_trans *tp, + struct xfs_inode *ip, + enum xfs_metafile_type metafile_type) +{ + VFS_I(ip)->i_mode &= ~0777; + VFS_I(ip)->i_uid = GLOBAL_ROOT_UID; + VFS_I(ip)->i_gid = GLOBAL_ROOT_GID; + if (S_ISDIR(VFS_I(ip)->i_mode)) + ip->i_diflags |= XFS_METADIR_DIFLAGS; + else + ip->i_diflags |= XFS_METAFILE_DIFLAGS; + ip->i_diflags2 &= ~XFS_DIFLAG2_DAX; + ip->i_diflags2 |= XFS_DIFLAG2_METADATA; + ip->i_metatype = metafile_type; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +} + +/* Clear the metadata directory inode flag. */ +void +xfs_metafile_clear_iflag( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + ASSERT(xfs_is_metadir_inode(ip)); + ASSERT(VFS_I(ip)->i_nlink == 0); + + ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +} diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h index c66b0c51b461..acec400123db 100644 --- a/fs/xfs/libxfs/xfs_metafile.h +++ b/fs/xfs/libxfs/xfs_metafile.h @@ -17,6 +17,10 @@ #define XFS_METADIR_DIFLAGS (XFS_METAFILE_DIFLAGS | \ XFS_DIFLAG_NOSYMLINKS) +void xfs_metafile_set_iflag(struct xfs_trans *tp, struct xfs_inode *ip, + enum xfs_metafile_type metafile_type); +void xfs_metafile_clear_iflag(struct xfs_trans *tp, struct xfs_inode *ip); + /* Code specific to kernel/userspace; must be provided externally. */ int xfs_trans_metafile_iget(struct xfs_trans *tp, xfs_ino_t ino, diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 48543bf0f5ce..5171ad93fc40 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -850,7 +850,7 @@ xfs_trans_metafile_iget( int error; error = xfs_iget(mp, tp, ino, 0, 0, &ip); - if (error == -EFSCORRUPTED) + if (error == -EFSCORRUPTED || error == -EINVAL) goto whine; if (error) return error; diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 17164b2d0472..1b9d75a54c5e 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -46,6 +46,8 @@ #include "xfs_parent.h" #include "xfs_rmap.h" #include "xfs_refcount.h" +#include "xfs_metafile.h" +#include "xfs_metadir.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 8aa6af5c9c01..e2db13ed08b5 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -95,6 +95,7 @@ struct xfs_attrlist_cursor_kern; struct xfs_extent_free_item; struct xfs_rmap_intent; struct xfs_refcount_intent; +struct xfs_metadir_update; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -5352,6 +5353,107 @@ DEFINE_EVENT(xfs_getparents_class, name, \ DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_begin); DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_end); +DECLARE_EVENT_CLASS(xfs_metadir_update_class, + TP_PROTO(const struct xfs_metadir_update *upd), + TP_ARGS(upd), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __field(xfs_ino_t, ino) + __string(fname, upd->path) + ), + TP_fast_assign( + __entry->dev = upd->dp->i_mount->m_super->s_dev; + __entry->dp_ino = upd->dp->i_ino; + __entry->ino = upd->ip ? upd->ip->i_ino : NULLFSINO; + __assign_str(fname); + ), + TP_printk("dev %d:%d dp 0x%llx fname '%s' ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __get_str(fname), + __entry->ino) +) + +#define DEFINE_METADIR_UPDATE_EVENT(name) \ +DEFINE_EVENT(xfs_metadir_update_class, name, \ + TP_PROTO(const struct xfs_metadir_update *upd), \ + TP_ARGS(upd)) +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_start_create); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_start_link); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_commit); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_cancel); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_try_create); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_create); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_link); + +DECLARE_EVENT_CLASS(xfs_metadir_update_error_class, + TP_PROTO(const struct xfs_metadir_update *upd, int error), + TP_ARGS(upd, error), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __field(xfs_ino_t, ino) + __field(int, error) + __string(fname, upd->path) + ), + TP_fast_assign( + __entry->dev = upd->dp->i_mount->m_super->s_dev; + __entry->dp_ino = upd->dp->i_ino; + __entry->ino = upd->ip ? upd->ip->i_ino : NULLFSINO; + __entry->error = error; + __assign_str(fname); + ), + TP_printk("dev %d:%d dp 0x%llx fname '%s' ino 0x%llx error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __get_str(fname), + __entry->ino, + __entry->error) +) + +#define DEFINE_METADIR_UPDATE_ERROR_EVENT(name) \ +DEFINE_EVENT(xfs_metadir_update_error_class, name, \ + TP_PROTO(const struct xfs_metadir_update *upd, int error), \ + TP_ARGS(upd, error)) +DEFINE_METADIR_UPDATE_ERROR_EVENT(xfs_metadir_teardown); + +DECLARE_EVENT_CLASS(xfs_metadir_class, + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name, + xfs_ino_t ino), + TP_ARGS(dp, name, ino), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __field(xfs_ino_t, ino) + __field(int, ftype) + __field(int, namelen) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(dp)->i_sb->s_dev; + __entry->dp_ino = dp->i_ino; + __entry->ino = ino, + __entry->ftype = name->type; + __entry->namelen = name->len; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d dir 0x%llx type %s name '%.*s' ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __print_symbolic(__entry->ftype, XFS_DIR3_FTYPE_STR), + __entry->namelen, + __get_str(name), + __entry->ino) +) + +#define DEFINE_METADIR_EVENT(name) \ +DEFINE_EVENT(xfs_metadir_class, name, \ + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name, \ + xfs_ino_t ino), \ + TP_ARGS(dp, name, ino)) +DEFINE_METADIR_EVENT(xfs_metadir_lookup); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH From 8651b410ae781cc607159c51dbb0b317b23543b1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:52 -0800 Subject: [PATCH 10/28] xfs: disable the agi rotor for metadata inodes Ideally, we'd put all the metadata inodes in one place if we could, so that the metadata all stay reasonably close together instead of spreading out over the disk. Furthermore, if the log is internal we'd probably prefer to keep the metadata near the log. Therefore, disable AGI rotoring for metadata inode allocations. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_ialloc.c | 60 ++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index f0261c4d9106..8b84e2cf711b 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1841,6 +1841,40 @@ xfs_dialloc_try_ag( return error; } +/* + * Pick an AG for the new inode. + * + * Directories, symlinks, and regular files frequently allocate at least one + * block, so factor that potential expansion when we examine whether an AG has + * enough space for file creation. Try to keep metadata files all in the same + * AG. + */ +static inline xfs_agnumber_t +xfs_dialloc_pick_ag( + struct xfs_mount *mp, + struct xfs_inode *dp, + umode_t mode) +{ + xfs_agnumber_t start_agno; + + if (!dp) + return 0; + if (xfs_is_metadir_inode(dp)) { + if (mp->m_sb.sb_logstart) + return XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); + return 0; + } + + if (S_ISDIR(mode)) + return (atomic_inc_return(&mp->m_agirotor) - 1) % mp->m_maxagi; + + start_agno = XFS_INO_TO_AGNO(mp, dp->i_ino); + if (start_agno >= mp->m_maxagi) + start_agno = 0; + + return start_agno; +} + /* * Allocate an on-disk inode. * @@ -1856,31 +1890,19 @@ xfs_dialloc( xfs_ino_t *new_ino) { struct xfs_mount *mp = (*tpp)->t_mountp; - xfs_ino_t parent = args->pip ? args->pip->i_ino : 0; - umode_t mode = args->mode & S_IFMT; - xfs_agnumber_t agno; - int error = 0; - xfs_agnumber_t start_agno; struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp); + xfs_ino_t ino = NULLFSINO; + xfs_ino_t parent = args->pip ? args->pip->i_ino : 0; + xfs_agnumber_t agno; + xfs_agnumber_t start_agno; + umode_t mode = args->mode & S_IFMT; bool ok_alloc = true; bool low_space = false; int flags; - xfs_ino_t ino = NULLFSINO; + int error = 0; - /* - * Directories, symlinks, and regular files frequently allocate at least - * one block, so factor that potential expansion when we examine whether - * an AG has enough space for file creation. - */ - if (S_ISDIR(mode)) - start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) % - mp->m_maxagi; - else { - start_agno = XFS_INO_TO_AGNO(mp, parent); - if (start_agno >= mp->m_maxagi) - start_agno = 0; - } + start_agno = xfs_dialloc_pick_ag(mp, args->pip, mode); /* * If we have already hit the ceiling of inode blocks then clear From bb6cdd5529ff67081466ef7257000b04204aea23 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:53 -0800 Subject: [PATCH 11/28] xfs: hide metadata inodes from everyone because they are special Metadata inodes are private files and therefore cannot be exposed to userspace. This means no bulkstat, no open-by-handle, no linking them into the directory tree, and no feeding them to LSMs. As such, we mark them S_PRIVATE, which stops all that. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/tempfile.c | 8 ++++++++ fs/xfs/xfs_iops.c | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index 177f922acfaf..3c5a1d77fefa 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -844,6 +844,14 @@ xrep_is_tempfile( const struct xfs_inode *ip) { const struct inode *inode = &ip->i_vnode; + struct xfs_mount *mp = ip->i_mount; + + /* + * Files in the metadata directory tree also have S_PRIVATE set and + * IOP_XATTR unset, so we must distinguish them separately. + */ + if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA)) + return false; if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR)) return true; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ee79cf161312..66a726a5fbbb 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -42,7 +42,9 @@ * held. For regular files, the lock order is the other way around - the * mmap_lock is taken during the page fault, and then we lock the ilock to do * block mapping. Hence we need a different class for the directory ilock so - * that lockdep can tell them apart. + * that lockdep can tell them apart. Directories in the metadata directory + * tree get a separate class so that lockdep reports will warn us if someone + * ever tries to lock regular directories after locking metadata directories. */ static struct lock_class_key xfs_nondir_ilock_class; static struct lock_class_key xfs_dir_ilock_class; @@ -1289,6 +1291,7 @@ xfs_setup_inode( { struct inode *inode = &ip->i_vnode; gfp_t gfp_mask; + bool is_meta = xfs_is_internal_inode(ip); inode->i_ino = ip->i_ino; inode->i_state |= I_NEW; @@ -1300,6 +1303,16 @@ xfs_setup_inode( i_size_write(inode, ip->i_disk_size); xfs_diflags_to_iflags(ip, true); + /* + * Mark our metadata files as private so that LSMs and the ACL code + * don't try to add their own metadata or reason about these files, + * and users cannot ever obtain file handles to them. + */ + if (is_meta) { + inode->i_flags |= S_PRIVATE; + inode->i_opflags &= ~IOP_XATTR; + } + if (S_ISDIR(inode->i_mode)) { /* * We set the i_rwsem class here to avoid potential races with From 688828d8f8cdf8b1b917de938a1ce86a93fdbba9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:53 -0800 Subject: [PATCH 12/28] xfs: advertise metadata directory feature Advertise the existence of the metadata directory feature; this will be used by scrub to decide if it needs to scan the metadir too. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/libxfs/xfs_sb.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 860284064c5a..a42c1a33691c 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -242,6 +242,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */ #define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */ #define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */ +#define XFS_FSOP_GEOM_FLAGS_METADIR (1 << 26) /* metadata directories */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 19fa999b4032..4516824e3b99 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1295,6 +1295,8 @@ xfs_fs_geometry( geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64; if (xfs_has_exchange_range(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE; + if (xfs_has_metadir(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR; geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); From df866c538ff098baa210b407b822818a415a6e7e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:54 -0800 Subject: [PATCH 13/28] xfs: allow bulkstat to return metadata directories Allow the V5 bulkstat ioctl to return information about metadata directory files so that xfs_scrub can find and scrub them, since they are otherwise ordinary directories. (Metadata files of course require per-file scrub code and hence do not need exposure.) Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 10 +++++++++- fs/xfs/xfs_ioctl.c | 7 +++++++ fs/xfs/xfs_itable.c | 33 +++++++++++++++++++++++++++++---- fs/xfs/xfs_itable.h | 3 +++ 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index a42c1a33691c..499bea4ea806 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -490,9 +490,17 @@ struct xfs_bulk_ireq { */ #define XFS_BULK_IREQ_NREXT64 (1U << 2) +/* + * Allow bulkstat to return information about metadata directories. This + * enables xfs_scrub to find them for scanning, as they are otherwise ordinary + * directories. + */ +#define XFS_BULK_IREQ_METADIR (1U << 3) + #define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \ XFS_BULK_IREQ_SPECIAL | \ - XFS_BULK_IREQ_NREXT64) + XFS_BULK_IREQ_NREXT64 | \ + XFS_BULK_IREQ_METADIR) /* Operate on the root directory inode. */ #define XFS_BULK_IREQ_SPECIAL_ROOT (1) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 2567fd2a0994..f36fd8db388c 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -233,6 +233,10 @@ xfs_bulk_ireq_setup( if (hdr->flags & XFS_BULK_IREQ_NREXT64) breq->flags |= XFS_IBULK_NREXT64; + /* Caller wants to see metadata directories in bulkstat output. */ + if (hdr->flags & XFS_BULK_IREQ_METADIR) + breq->flags |= XFS_IBULK_METADIR; + return 0; } @@ -323,6 +327,9 @@ xfs_ioc_inumbers( if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) return -EFAULT; + if (hdr.flags & XFS_BULK_IREQ_METADIR) + return -EINVAL; + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers); if (error == -ECANCELED) goto out_teardown; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 37c2b50d877e..1fa1c0564b0c 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -36,6 +36,14 @@ struct xfs_bstat_chunk { struct xfs_bulkstat *buf; }; +static inline bool +want_metadir_file( + struct xfs_inode *ip, + struct xfs_ibulk *breq) +{ + return xfs_is_metadir_inode(ip) && (breq->flags & XFS_IBULK_METADIR); +} + /* * Fill out the bulkstat info for a single inode and report it somewhere. * @@ -69,9 +77,6 @@ xfs_bulkstat_one_int( vfsuid_t vfsuid; vfsgid_t vfsgid; - if (xfs_is_sb_inum(mp, ino)) - goto out_advance; - error = xfs_iget(mp, tp, ino, (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), XFS_ILOCK_SHARED, &ip); @@ -97,8 +102,28 @@ xfs_bulkstat_one_int( vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid = i_gid_into_vfsgid(idmap, inode); + /* + * If caller wants files from the metadata directories, push out the + * bare minimum information for enabling scrub. + */ + if (want_metadir_file(ip, bc->breq)) { + memset(buf, 0, sizeof(*buf)); + buf->bs_ino = ino; + buf->bs_gen = inode->i_generation; + buf->bs_mode = inode->i_mode & S_IFMT; + xfs_bulkstat_health(ip, buf); + buf->bs_version = XFS_BULKSTAT_VERSION_V5; + xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_irele(ip); + + error = bc->formatter(bc->breq, buf); + if (!error || error == -ECANCELED) + goto out_advance; + goto out; + } + /* If this is a private inode, don't leak its details to userspace. */ - if (IS_PRIVATE(inode)) { + if (IS_PRIVATE(inode) || xfs_is_sb_inum(mp, ino)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_irele(ip); error = -EINVAL; diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 1659f13f17a8..f10e8f8f2335 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -22,6 +22,9 @@ struct xfs_ibulk { /* Fill out the bs_extents64 field if set. */ #define XFS_IBULK_NREXT64 (1U << 1) +/* Signal that we can return metadata directories. */ +#define XFS_IBULK_METADIR (1U << 2) + /* * Advance the user buffer pointer by one record of the given size. If the * buffer is now full, return the appropriate error code. From 382e275f0e8d09c886ad4cf949e89208463f1ff0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:55 -0800 Subject: [PATCH 14/28] xfs: don't count metadata directory files to quota Files in the metadata directory tree are internal to the filesystem. Don't count the inodes or the blocks they use in the root dquot because users do not need to know about their resource usage. This will also quiet down complaints about dquot usage not matching du output. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_dquot.c | 1 + fs/xfs/xfs_qm.c | 11 +++++++++++ fs/xfs/xfs_quota.h | 5 +++++ fs/xfs/xfs_trans_dquot.c | 6 ++++++ 4 files changed, 23 insertions(+) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index c1b211c260a9..3bf47458c517 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -983,6 +983,7 @@ xfs_qm_dqget_inode( xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(xfs_inode_dquot(ip, type) == NULL); + ASSERT(!xfs_is_metadir_inode(ip)); id = xfs_qm_id_for_quotatype(ip, type); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index d0674d84af3e..ec983cca9ada 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -304,6 +304,8 @@ xfs_qm_need_dqattach( return false; if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return false; + if (xfs_is_metadir_inode(ip)) + return false; return true; } @@ -326,6 +328,7 @@ xfs_qm_dqattach_locked( return 0; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); + ASSERT(!xfs_is_metadir_inode(ip)); if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) { error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER, @@ -1204,6 +1207,10 @@ xfs_qm_dqusage_adjust( } } + /* Metadata directory files are not accounted to user-visible quotas. */ + if (xfs_is_metadir_inode(ip)) + goto error0; + ASSERT(ip->i_delayed_blks == 0); if (XFS_IS_REALTIME_INODE(ip)) { @@ -1754,6 +1761,8 @@ xfs_qm_vop_dqalloc( if (!XFS_IS_QUOTA_ON(mp)) return 0; + ASSERT(!xfs_is_metadir_inode(ip)); + lockflags = XFS_ILOCK_EXCL; xfs_ilock(ip, lockflags); @@ -1883,6 +1892,7 @@ xfs_qm_vop_chown( xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(XFS_IS_QUOTA_ON(ip->i_mount)); + ASSERT(!xfs_is_metadir_inode(ip)); /* old dquot */ prevdq = *IO_olddq; @@ -1970,6 +1980,7 @@ xfs_qm_vop_create_dqattach( return; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); + ASSERT(!xfs_is_metadir_inode(ip)); if (udqp && XFS_IS_UQUOTA_ON(mp)) { ASSERT(ip->i_udquot == NULL); diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 23d71a55bbc0..645761997bf2 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -29,6 +29,11 @@ struct xfs_buf; (XFS_IS_GQUOTA_ON(mp) && (ip)->i_gdquot == NULL) || \ (XFS_IS_PQUOTA_ON(mp) && (ip)->i_pdquot == NULL)) +#define XFS_IS_DQDETACHED(ip) \ + ((ip)->i_udquot == NULL && \ + (ip)->i_gdquot == NULL && \ + (ip)->i_pdquot == NULL) + #define XFS_QM_NEED_QUOTACHECK(mp) \ ((XFS_IS_UQUOTA_ON(mp) && \ (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index b368e13424c4..ca7df018290e 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -156,6 +156,8 @@ xfs_trans_mod_ino_dquot( unsigned int field, int64_t delta) { + ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip)); + xfs_trans_mod_dquot(tp, dqp, field, delta); if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) { @@ -247,6 +249,8 @@ xfs_trans_mod_dquot_byino( xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return; + ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip)); + if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) xfs_trans_mod_ino_dquot(tp, ip, ip->i_udquot, field, delta); if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) @@ -962,6 +966,8 @@ xfs_trans_reserve_quota_nblks( if (!XFS_IS_QUOTA_ON(mp)) return 0; + if (xfs_is_metadir_inode(ip)) + return 0; ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino)); xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); From cc0cf84aa7fe249f8c1ff5f6cecf69de9d07b582 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:55 -0800 Subject: [PATCH 15/28] xfs: mark quota inodes as metadata files When we're creating quota files at mount time, make sure to mark them as metadir inodes if appropriate. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_qm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ec983cca9ada..b94d6f192e72 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -824,6 +824,8 @@ xfs_qm_qino_alloc( xfs_trans_cancel(tp); return error; } + if (xfs_has_metadir(mp)) + xfs_metafile_set_iflag(tp, *ipp, metafile_type); } /* From 61b6bdb30a4bee1f3417081aedfe9e346538f897 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:56 -0800 Subject: [PATCH 16/28] xfs: adjust xfs_bmap_add_attrfork for metadir Online repair might use the xfs_bmap_add_attrfork to repair a file in the metadata directory tree if (say) the metadata file lacks the correct parent pointers. In that case, it is not correct to check that the file is dqattached -- metadata files must be not have /any/ dquot attached at all. Adjust the assertions appropriately. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 5 ++++- fs/xfs/libxfs/xfs_bmap.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index c63da14eee04..17875ad865f5 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1004,7 +1004,10 @@ xfs_attr_add_fork( unsigned int blks; /* space reservation */ int error; /* error return value */ - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + if (xfs_is_metadir_inode(ip)) + ASSERT(XFS_IS_DQDETACHED(ip)); + else + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); blks = XFS_ADDAFORK_SPACE_RES(mp); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 5eda036cf9bf..7805a36e98c4 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1042,7 +1042,10 @@ xfs_bmap_add_attrfork( int error; /* error return value */ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + if (xfs_is_metadir_inode(ip)) + ASSERT(XFS_IS_DQDETACHED(ip)); + else + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); ASSERT(!xfs_inode_has_attr_fork(ip)); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); From be42fc1393d66024eb6415c92f45fab5d1878c3e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:57 -0800 Subject: [PATCH 17/28] xfs: record health problems with the metadata directory Make a report to the health monitoring subsystem any time we encounter something in the metadata directory tree that looks like corruption. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/libxfs/xfs_health.h | 4 +++- fs/xfs/libxfs/xfs_metadir.c | 13 ++++++++++--- fs/xfs/xfs_health.c | 1 + fs/xfs/xfs_icache.c | 1 + fs/xfs/xfs_inode.c | 1 + 6 files changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 499bea4ea806..b05e6fb14703 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -198,6 +198,7 @@ struct xfs_fsop_geom { #define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */ #define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */ #define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */ +#define XFS_FSOP_GEOM_SICK_METADIR (1 << 8) /* metadata directory */ /* Output for XFS_FS_COUNTS */ typedef struct xfs_fsop_counts { diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 13301420a2f6..f90e8dfc0500 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -62,6 +62,7 @@ struct xfs_da_args; #define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */ #define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */ #define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */ +#define XFS_SICK_FS_METADIR (1 << 6) /* metadata directory tree */ /* Observable health issues for realtime volume metadata. */ #define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */ @@ -105,7 +106,8 @@ struct xfs_da_args; XFS_SICK_FS_GQUOTA | \ XFS_SICK_FS_PQUOTA | \ XFS_SICK_FS_QUOTACHECK | \ - XFS_SICK_FS_NLINKS) + XFS_SICK_FS_NLINKS | \ + XFS_SICK_FS_METADIR) #define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \ XFS_SICK_RT_SUMMARY) diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c index 0a61316b4f52..bae7377c0f22 100644 --- a/fs/xfs/libxfs/xfs_metadir.c +++ b/fs/xfs/libxfs/xfs_metadir.c @@ -28,6 +28,7 @@ #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_parent.h" +#include "xfs_health.h" /* * Metadata Directory Tree @@ -94,8 +95,10 @@ xfs_metadir_lookup( }; int error; - if (!S_ISDIR(VFS_I(dp)->i_mode)) + if (!S_ISDIR(VFS_I(dp)->i_mode)) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; + } if (xfs_is_shutdown(mp)) return -EIO; @@ -103,10 +106,14 @@ xfs_metadir_lookup( if (error) return error; - if (!xfs_verify_ino(mp, args.inumber)) + if (!xfs_verify_ino(mp, args.inumber)) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; - if (xname->type != XFS_DIR3_FT_UNKNOWN && xname->type != args.filetype) + } + if (xname->type != XFS_DIR3_FT_UNKNOWN && xname->type != args.filetype) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; + } trace_xfs_metadir_lookup(dp, xname, args.inumber); *ino = args.inumber; diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index f45f125a669d..238258701450 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -380,6 +380,7 @@ static const struct ioctl_sick_map fs_map[] = { { XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA }, { XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK }, { XFS_SICK_FS_NLINKS, XFS_FSOP_GEOM_SICK_NLINKS }, + { XFS_SICK_FS_METADIR, XFS_FSOP_GEOM_SICK_METADIR }, { 0, 0 }, }; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 5171ad93fc40..7b6c026d01a1 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -878,6 +878,7 @@ xfs_trans_metafile_iget( whine: xfs_err(mp, "metadata inode 0x%llx type %u is corrupt", ino, metafile_type); + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ae94583ea3bb..103cf8b2af24 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -560,6 +560,7 @@ xfs_lookup( * a metadata file. */ if (XFS_IS_CORRUPT(dp->i_mount, xfs_is_metadir_inode(*ipp))) { + xfs_fs_mark_sick(dp->i_mount, XFS_SICK_FS_METADIR); error = -EFSCORRUPTED; goto out_irele; } From 679b098b59cf6d0fc10f2f66c68af4202686cbf9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:57 -0800 Subject: [PATCH 18/28] xfs: refactor directory tree root predicates Metadata directory trees make reasoning about the parent of a file more difficult. Traditionally, user files are children of sb_rootino, and metadata files are "children" of the superblock. Now, we add a third possibility -- some metadata files can be children of sb_metadirino, but the classic ones (rt free space data and quotas) are left alone. Let's add some helper functions (instead of open-coding the logic everywhere) to make scrub logic easier to understand. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/common.c | 29 +++++++++++++++++++++++++++++ fs/xfs/scrub/common.h | 4 ++++ fs/xfs/scrub/dir.c | 2 +- fs/xfs/scrub/dir_repair.c | 2 +- fs/xfs/scrub/dirtree.c | 15 ++++++++++++++- fs/xfs/scrub/dirtree.h | 12 +----------- fs/xfs/scrub/findparent.c | 15 +++++++++------ fs/xfs/scrub/inode_repair.c | 11 ++--------- fs/xfs/scrub/nlinks.c | 4 ++-- fs/xfs/scrub/nlinks_repair.c | 4 +--- fs/xfs/scrub/orphanage.c | 4 +++- fs/xfs/scrub/parent.c | 17 ++++++++--------- fs/xfs/scrub/parent_repair.c | 2 +- 13 files changed, 76 insertions(+), 45 deletions(-) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 001af49b2988..3ca3173c5a54 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1452,3 +1452,32 @@ xchk_inode_is_allocated( rcu_read_unlock(); return error; } + +/* Is this inode a root directory for either tree? */ +bool +xchk_inode_is_dirtree_root(const struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + return ip == mp->m_rootip || + (xfs_has_metadir(mp) && ip == mp->m_metadirip); +} + +/* Does the superblock point down to this inode? */ +bool +xchk_inode_is_sb_rooted(const struct xfs_inode *ip) +{ + return xchk_inode_is_dirtree_root(ip) || + xfs_is_sb_inum(ip->i_mount, ip->i_ino); +} + +/* What is the root directory inumber for this inode? */ +xfs_ino_t +xchk_inode_rootdir_inum(const struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + if (xfs_is_metadir_inode(ip)) + return mp->m_metadirip->i_ino; + return mp->m_rootip->i_ino; +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index f3db628b14e1..b419adc6e7cf 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -242,4 +242,8 @@ void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks); int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino, bool *inuse); +bool xchk_inode_is_dirtree_root(const struct xfs_inode *ip); +bool xchk_inode_is_sb_rooted(const struct xfs_inode *ip); +xfs_ino_t xchk_inode_rootdir_inum(const struct xfs_inode *ip); + #endif /* __XFS_SCRUB_COMMON_H__ */ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index bf9199e8df63..6b719c8885ef 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -253,7 +253,7 @@ xchk_dir_actor( * If this is ".." in the root inode, check that the inum * matches this dir. */ - if (dp->i_ino == mp->m_sb.sb_rootino && ino != dp->i_ino) + if (xchk_inode_is_dirtree_root(dp) && ino != dp->i_ino) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); } diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 64679fe08446..0c2cd42b3110 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -1270,7 +1270,7 @@ xrep_dir_scan_dirtree( int error; /* Roots of directory trees are their own parents. */ - if (sc->ip == sc->mp->m_rootip) + if (xchk_inode_is_dirtree_root(sc->ip)) xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino); /* diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c index bde58fb561ea..e43840733de9 100644 --- a/fs/xfs/scrub/dirtree.c +++ b/fs/xfs/scrub/dirtree.c @@ -917,7 +917,7 @@ xchk_dirtree( * scan, because the hook doesn't detach until after sc->ip gets * released during teardown. */ - dl->root_ino = sc->mp->m_rootip->i_ino; + dl->root_ino = xchk_inode_rootdir_inum(sc->ip); dl->scan_ino = sc->ip->i_ino; trace_xchk_dirtree_start(sc->ip, sc->sm, 0); @@ -983,3 +983,16 @@ xchk_dirtree( trace_xchk_dirtree_done(sc->ip, sc->sm, error); return error; } + +/* Does the directory targetted by this scrub have no parents? */ +bool +xchk_dirtree_parentless(const struct xchk_dirtree *dl) +{ + struct xfs_scrub *sc = dl->sc; + + if (xchk_inode_is_dirtree_root(sc->ip)) + return true; + if (VFS_I(sc->ip)->i_nlink == 0) + return true; + return false; +} diff --git a/fs/xfs/scrub/dirtree.h b/fs/xfs/scrub/dirtree.h index 1e1686365c61..9e5d95492717 100644 --- a/fs/xfs/scrub/dirtree.h +++ b/fs/xfs/scrub/dirtree.h @@ -156,17 +156,7 @@ struct xchk_dirtree { #define xchk_dirtree_for_each_path(dl, path) \ list_for_each_entry((path), &(dl)->path_list, list) -static inline bool -xchk_dirtree_parentless(const struct xchk_dirtree *dl) -{ - struct xfs_scrub *sc = dl->sc; - - if (sc->ip == sc->mp->m_rootip) - return true; - if (VFS_I(sc->ip)->i_nlink == 0) - return true; - return false; -} +bool xchk_dirtree_parentless(const struct xchk_dirtree *dl); int xchk_dirtree_find_paths_to_root(struct xchk_dirtree *dl); int xchk_dirpath_append(struct xchk_dirtree *dl, struct xfs_inode *ip, diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c index 01766041ba2c..153d185190d8 100644 --- a/fs/xfs/scrub/findparent.c +++ b/fs/xfs/scrub/findparent.c @@ -362,15 +362,18 @@ xrep_findparent_confirm( }; int error; - /* - * The root directory always points to itself. Unlinked dirs can point - * anywhere, so we point them at the root dir too. - */ - if (sc->ip == sc->mp->m_rootip || VFS_I(sc->ip)->i_nlink == 0) { + /* The root directory always points to itself. */ + if (sc->ip == sc->mp->m_rootip) { *parent_ino = sc->mp->m_sb.sb_rootino; return 0; } + /* Unlinked dirs can point anywhere; point them up to the root dir. */ + if (VFS_I(sc->ip)->i_nlink == 0) { + *parent_ino = xchk_inode_rootdir_inum(sc->ip); + return 0; + } + /* Reject garbage parent inode numbers and self-referential parents. */ if (*parent_ino == NULLFSINO) return 0; @@ -413,7 +416,7 @@ xrep_findparent_self_reference( return sc->mp->m_sb.sb_rootino; if (VFS_I(sc->ip)->i_nlink == 0) - return sc->mp->m_sb.sb_rootino; + return xchk_inode_rootdir_inum(sc->ip); return NULLFSINO; } diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 486cedbc40bb..eaa1e1afe3a4 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -1767,15 +1767,8 @@ xrep_inode_pptr( if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) return 0; - /* The root directory doesn't have a parent pointer. */ - if (ip == mp->m_rootip) - return 0; - - /* - * Metadata inodes are rooted in the superblock and do not have any - * parents. - */ - if (xfs_is_internal_inode(ip)) + /* Children of the superblock do not have parent pointers. */ + if (xchk_inode_is_sb_rooted(ip)) return 0; /* Inode already has an attr fork; no further work possible here. */ diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 80aee30886c4..4a47d0aabf73 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -279,7 +279,7 @@ xchk_nlinks_collect_dirent( * determine the backref count. */ if (dotdot) { - if (dp == sc->mp->m_rootip) + if (xchk_inode_is_dirtree_root(dp)) error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); else if (!xfs_has_parent(sc->mp)) error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0); @@ -735,7 +735,7 @@ xchk_nlinks_compare_inode( } } - if (ip == sc->mp->m_rootip) { + if (xchk_inode_is_dirtree_root(ip)) { /* * For the root of a directory tree, both the '.' and '..' * entries should point to the root directory. The dotdot diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c index b3e707f47b7b..4ebdee095428 100644 --- a/fs/xfs/scrub/nlinks_repair.c +++ b/fs/xfs/scrub/nlinks_repair.c @@ -60,11 +60,9 @@ xrep_nlinks_is_orphaned( unsigned int actual_nlink, const struct xchk_nlink *obs) { - struct xfs_mount *mp = ip->i_mount; - if (obs->parents != 0) return false; - if (ip == mp->m_rootip || ip == sc->orphanage) + if (xchk_inode_is_dirtree_root(ip) || ip == sc->orphanage) return false; return actual_nlink != 0; } diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 5f0d42392608..c287c755f2c5 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -295,7 +295,9 @@ xrep_orphanage_can_adopt( return false; if (sc->ip == sc->orphanage) return false; - if (xfs_is_sb_inum(sc->mp, sc->ip->i_ino)) + if (xchk_inode_is_sb_rooted(sc->ip)) + return false; + if (xfs_is_internal_inode(sc->ip)) return false; return true; } diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 20711a68a874..582536076433 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -300,7 +300,7 @@ xchk_parent_pptr_and_dotdot( } /* Is this the root dir? Then '..' must point to itself. */ - if (sc->ip == sc->mp->m_rootip) { + if (xchk_inode_is_dirtree_root(sc->ip)) { if (sc->ip->i_ino != pp->parent_ino) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; @@ -711,7 +711,7 @@ xchk_parent_count_pptrs( } if (S_ISDIR(VFS_I(sc->ip)->i_mode)) { - if (sc->ip == sc->mp->m_rootip) + if (xchk_inode_is_dirtree_root(sc->ip)) pp->pptrs_found++; if (VFS_I(sc->ip)->i_nlink == 0 && pp->pptrs_found > 0) @@ -885,10 +885,9 @@ bool xchk_pptr_looks_zapped( struct xfs_inode *ip) { - struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); - ASSERT(xfs_has_parent(mp)); + ASSERT(xfs_has_parent(ip->i_mount)); /* * Temporary files that cannot be linked into the directory tree do not @@ -902,15 +901,15 @@ xchk_pptr_looks_zapped( * of a parent pointer scan is always the empty set. It's safe to scan * them even if the attr fork was zapped. */ - if (ip == mp->m_rootip) + if (xchk_inode_is_dirtree_root(ip)) return false; /* - * Metadata inodes are all rooted in the superblock and do not have - * any parents. Hence the attr fork will not be initialized, but - * there are no parent pointers that might have been zapped. + * Metadata inodes that are rooted in the superblock do not have any + * parents. Hence the attr fork will not be initialized, but there are + * no parent pointers that might have been zapped. */ - if (xfs_is_internal_inode(ip)) + if (xchk_inode_is_sb_rooted(ip)) return false; /* diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index 7b42b7f65a0b..f4e4845b7ec0 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -1334,7 +1334,7 @@ xrep_parent_rebuild_pptrs( * so that we can decide if we're moving this file to the orphanage. * For this purpose, root directories are their own parents. */ - if (sc->ip == sc->mp->m_rootip) { + if (xchk_inode_is_dirtree_root(sc->ip)) { xrep_findparent_scan_found(&rp->pscan, sc->ip->i_ino); } else { error = xrep_parent_lookup_pptrs(sc, &parent_ino); From 13af229ee0dc348519202504961a178a2ed48102 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:58 -0800 Subject: [PATCH 19/28] xfs: do not count metadata directory files when doing online quotacheck Previously, we stated that files in the metadata directory tree are not counted in the dquot information. Fix the online quotacheck code to reflect this. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/quotacheck.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c index c77eb2de8df7..dc4033b91e44 100644 --- a/fs/xfs/scrub/quotacheck.c +++ b/fs/xfs/scrub/quotacheck.c @@ -398,10 +398,13 @@ xqcheck_collect_inode( bool isreg = S_ISREG(VFS_I(ip)->i_mode); int error = 0; - if (xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) { + if (xfs_is_metadir_inode(ip) || + xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) { /* * Quota files are never counted towards quota, so we do not - * need to take the lock. + * need to take the lock. Files do not switch between the + * metadata and regular directory trees without a reallocation, + * so we do not need to ILOCK them either. */ xchk_iscan_mark_visited(&xqc->iscan, ip); return 0; From 91fb4232be87caf89edddcf66c6d029552f06bb9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:58 -0800 Subject: [PATCH 20/28] xfs: metadata files can have xattrs if metadir is enabled If parent pointers are enabled, then metadata files will store parent pointers in xattrs, just like files in the user visible directory tree. Therefore, scrub and repair need to handle attr forks for metadata files on metadir filesystems. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/common.c | 21 +++++++++++++++------ fs/xfs/scrub/repair.c | 14 +++++++++++--- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 3ca3173c5a54..6d955580f608 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1245,12 +1245,6 @@ xchk_metadata_inode_forks( return 0; } - /* They also should never have extended attributes. */ - if (xfs_inode_hasattr(sc->ip)) { - xchk_ino_set_corrupt(sc, sc->ip->i_ino); - return 0; - } - /* Invoke the data fork scrubber. */ error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD); if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) @@ -1267,6 +1261,21 @@ xchk_metadata_inode_forks( xchk_ino_set_corrupt(sc, sc->ip->i_ino); } + /* + * Metadata files can only have extended attributes on metadir + * filesystems, either for parent pointers or for actual xattr data. + */ + if (xfs_inode_hasattr(sc->ip)) { + if (!xfs_has_metadir(sc->mp)) { + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + return 0; + } + + error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA); + if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + return error; + } + return 0; } diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 646ac8ade88d..f80000d77552 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -1082,7 +1082,12 @@ xrep_metadata_inode_forks( if (error) return error; - /* Make sure the attr fork looks ok before we delete it. */ + /* + * Metadata files can only have extended attributes on metadir + * filesystems, either for parent pointers or for actual xattr data. + * For a non-metadir filesystem, make sure the attr fork looks ok + * before we delete it. + */ if (xfs_inode_hasattr(sc->ip)) { error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA); if (error) @@ -1098,8 +1103,11 @@ xrep_metadata_inode_forks( return error; } - /* Clear the attr forks since metadata shouldn't have that. */ - if (xfs_inode_hasattr(sc->ip)) { + /* + * Metadata files on non-metadir filesystems cannot have attr forks, + * so clear them now. + */ + if (xfs_inode_hasattr(sc->ip) && !xfs_has_metadir(sc->mp)) { if (!dirty) { dirty = true; xfs_trans_ijoin(sc->tp, sc->ip, 0); From aec2eb7da8f777998164a8ce4e38b84fd0136f97 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:59 -0800 Subject: [PATCH 21/28] xfs: adjust parent pointer scrubber for sb-rooted metadata files Starting with the metadata directory feature, we're allowed to call the directory and parent pointer scrubbers for every metadata file, including the ones that are children of the superblock. For these children, checking the link count against the number of parent pointers is a bit funny -- there's no such thing as a parent pointer for a child of the superblock since there's no corresponding dirent. For purposes of validating nlink, we pretend that there is a parent pointer. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/parent.c | 8 ++++++++ fs/xfs/scrub/parent_repair.c | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 582536076433..d8ea393f5059 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -720,6 +720,14 @@ xchk_parent_count_pptrs( pp->pptrs_found == 0) xchk_ino_set_corrupt(sc, sc->ip->i_ino); } else { + /* + * Starting with metadir, we allow checking of parent pointers + * of non-directory files that are children of the superblock. + * Pretend that we found a parent pointer attr. + */ + if (xfs_has_metadir(sc->mp) && xchk_inode_is_sb_rooted(sc->ip)) + pp->pptrs_found++; + if (VFS_I(sc->ip)->i_nlink != pp->pptrs_found) xchk_ino_set_corrupt(sc, sc->ip->i_ino); } diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index f4e4845b7ec0..31bfe10be22a 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -1354,21 +1354,40 @@ STATIC int xrep_parent_rebuild_tree( struct xrep_parent *rp) { + struct xfs_scrub *sc = rp->sc; + bool try_adoption; int error; - if (xfs_has_parent(rp->sc->mp)) { + if (xfs_has_parent(sc->mp)) { error = xrep_parent_rebuild_pptrs(rp); if (error) return error; } - if (rp->pscan.parent_ino == NULLFSINO) { - if (xrep_orphanage_can_adopt(rp->sc)) + /* + * Any file with no parent could be adopted. This check happens after + * rebuilding the parent pointer structure because we might have cycled + * the ILOCK during that process. + */ + try_adoption = rp->pscan.parent_ino == NULLFSINO; + + /* + * Starting with metadir, we allow checking of parent pointers + * of non-directory files that are children of the superblock. + * Lack of parent is ok here. + */ + if (try_adoption && xfs_has_metadir(sc->mp) && + xchk_inode_is_sb_rooted(sc->ip)) + try_adoption = false; + + if (try_adoption) { + if (xrep_orphanage_can_adopt(sc)) return xrep_parent_move_to_orphanage(rp); return -EFSCORRUPTED; + } - if (S_ISDIR(VFS_I(rp->sc->ip)->i_mode)) + if (S_ISDIR(VFS_I(sc->ip)->i_mode)) return xrep_parent_reset_dotdot(rp); return 0; @@ -1422,6 +1441,14 @@ xrep_parent_set_nondir_nlink( if (error) return error; + /* + * Starting with metadir, we allow checking of parent pointers of + * non-directory files that are children of the superblock. Pretend + * that we found a parent pointer attr. + */ + if (xfs_has_metadir(sc->mp) && xchk_inode_is_sb_rooted(sc->ip)) + rp->parents++; + if (rp->parents > 0 && xfs_inode_on_unlinked_list(ip)) { xfs_trans_ijoin(sc->tp, sc->ip, 0); joined = true; From 5dab2daa8aa1a127523f2babaf9611d91c28acea Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:18:59 -0800 Subject: [PATCH 22/28] xfs: fix di_metatype field of inodes that won't load Make sure that the di_metatype field is at least set plausibly so that later scrubbers could set the real type. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/inode.c | 9 +++++++-- fs/xfs/scrub/inode_repair.c | 6 +++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index ac5c56416533..25ee66e7649d 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -443,8 +443,13 @@ xchk_dinode( break; case 2: case 3: - if (!xfs_dinode_is_metadir(dip) && dip->di_metatype) - xchk_ino_set_corrupt(sc, ino); + if (xfs_dinode_is_metadir(dip)) { + if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) + xchk_ino_set_corrupt(sc, ino); + } else { + if (dip->di_metatype != 0) + xchk_ino_set_corrupt(sc, ino); + } if (dip->di_mode == 0 && sc->ip) xchk_ino_set_corrupt(sc, ino); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index eaa1e1afe3a4..5a58ddd27bd2 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -526,8 +526,12 @@ xrep_dinode_nlinks( return; } - if (!xfs_dinode_is_metadir(dip)) + if (xfs_dinode_is_metadir(dip)) { + if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) + dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN); + } else { dip->di_metatype = 0; + } } /* Fix any conflicting flags that the verifiers complain about. */ From 3d2c34111144a9fd3207ab914a7cd807cbe6a613 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:19:00 -0800 Subject: [PATCH 23/28] xfs: scrub metadata directories Teach online scrub about the metadata directory tree. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/dir.c | 8 ++++++++ fs/xfs/scrub/dir_repair.c | 6 ++++++ fs/xfs/scrub/dirtree.c | 17 ++++++++++++++--- fs/xfs/scrub/findparent.c | 13 +++++++++++++ fs/xfs/scrub/parent.c | 14 ++++++++++++++ fs/xfs/scrub/trace.h | 1 + 6 files changed, 56 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 6b719c8885ef..c877bde71e62 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -100,6 +100,14 @@ xchk_dir_check_ftype( if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + + /* + * Metadata and regular inodes cannot cross trees. This property + * cannot change without a full inode free and realloc cycle, so it's + * safe to check this without holding locks. + */ + if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(sc->ip)) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); } /* diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 0c2cd42b3110..2456cf1cb744 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -415,6 +415,12 @@ xrep_dir_salvage_entry( if (error) return 0; + /* Don't mix metadata and regular directory trees. */ + if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(rd->sc->ip)) { + xchk_irele(sc, ip); + return 0; + } + xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode); xchk_irele(sc, ip); diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c index e43840733de9..3a9cdf8738b6 100644 --- a/fs/xfs/scrub/dirtree.c +++ b/fs/xfs/scrub/dirtree.c @@ -362,7 +362,8 @@ xchk_dirpath_set_outcome( STATIC int xchk_dirpath_step_up( struct xchk_dirtree *dl, - struct xchk_dirpath *path) + struct xchk_dirpath *path, + bool is_metadir) { struct xfs_scrub *sc = dl->sc; struct xfs_inode *dp; @@ -435,6 +436,14 @@ xchk_dirpath_step_up( goto out_scanlock; } + /* Parent must be in the same directory tree. */ + if (is_metadir != xfs_is_metadir_inode(dp)) { + trace_xchk_dirpath_crosses_tree(dl->sc, dp, path->path_nr, + path->nr_steps, &dl->xname, &dl->pptr_rec); + error = -EFSCORRUPTED; + goto out_scanlock; + } + /* * If the extended attributes look as though they has been zapped by * the inode record repair code, we cannot scan for parent pointers. @@ -508,6 +517,7 @@ xchk_dirpath_walk_upwards( struct xchk_dirpath *path) { struct xfs_scrub *sc = dl->sc; + bool is_metadir; int error; ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL); @@ -538,6 +548,7 @@ xchk_dirpath_walk_upwards( * ILOCK state is no longer tracked in the scrub context. Hence we * must drop @sc->ip's ILOCK during the walk. */ + is_metadir = xfs_is_metadir_inode(sc->ip); mutex_unlock(&dl->lock); xchk_iunlock(sc, XFS_ILOCK_EXCL); @@ -547,7 +558,7 @@ xchk_dirpath_walk_upwards( * If we see any kind of error here (including corruptions), the parent * pointer of @sc->ip is corrupt. Stop the whole scan. */ - error = xchk_dirpath_step_up(dl, path); + error = xchk_dirpath_step_up(dl, path, is_metadir); if (error) { xchk_ilock(sc, XFS_ILOCK_EXCL); mutex_lock(&dl->lock); @@ -560,7 +571,7 @@ xchk_dirpath_walk_upwards( * *somewhere* in the path, but we don't need to stop scanning. */ while (!error && path->outcome == XCHK_DIRPATH_SCANNING) - error = xchk_dirpath_step_up(dl, path); + error = xchk_dirpath_step_up(dl, path, is_metadir); /* Retake the locks we had, mark paths, etc. */ xchk_ilock(sc, XFS_ILOCK_EXCL); diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c index 153d185190d8..84487072b6dd 100644 --- a/fs/xfs/scrub/findparent.c +++ b/fs/xfs/scrub/findparent.c @@ -172,6 +172,10 @@ xrep_findparent_walk_directory( */ lock_mode = xfs_ilock_data_map_shared(dp); + /* Don't mix metadata and regular directory trees. */ + if (xfs_is_metadir_inode(dp) != xfs_is_metadir_inode(sc->ip)) + goto out_unlock; + /* * If this directory is known to be sick, we cannot scan it reliably * and must abort. @@ -368,6 +372,12 @@ xrep_findparent_confirm( return 0; } + /* The metadata root directory always points to itself. */ + if (sc->ip == sc->mp->m_metadirip) { + *parent_ino = sc->mp->m_sb.sb_metadirino; + return 0; + } + /* Unlinked dirs can point anywhere; point them up to the root dir. */ if (VFS_I(sc->ip)->i_nlink == 0) { *parent_ino = xchk_inode_rootdir_inum(sc->ip); @@ -415,6 +425,9 @@ xrep_findparent_self_reference( if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino) return sc->mp->m_sb.sb_rootino; + if (sc->ip->i_ino == sc->mp->m_sb.sb_metadirino) + return sc->mp->m_sb.sb_metadirino; + if (VFS_I(sc->ip)->i_nlink == 0) return xchk_inode_rootdir_inum(sc->ip); diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index d8ea393f5059..3b692c4acc1e 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -132,6 +132,14 @@ xchk_parent_validate( return 0; } + /* Is this the metadata root dir? Then '..' must point to itself. */ + if (sc->ip == mp->m_metadirip) { + if (sc->ip->i_ino != mp->m_sb.sb_metadirino || + sc->ip->i_ino != parent_ino) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + return 0; + } + /* '..' must not point to ourselves. */ if (sc->ip->i_ino == parent_ino) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); @@ -185,6 +193,12 @@ xchk_parent_validate( goto out_unlock; } + /* Metadata and regular inodes cannot cross trees. */ + if (xfs_is_metadir_inode(dp) != xfs_is_metadir_inode(sc->ip)) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out_unlock; + } + /* Look for a directory entry in the parent pointing to the child. */ error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc); if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 58cc61f2ed53..bc246d86a5c8 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1753,6 +1753,7 @@ DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_badgen); DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_nondir_parent); DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_unlinked_parent); DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_found_next_step); +DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_crosses_tree); TRACE_DEFINE_ENUM(XCHK_DIRPATH_SCANNING); TRACE_DEFINE_ENUM(XCHK_DIRPATH_DELETE); From dcde94bdeeb94d04b3d8156345c79b9cdfcc4a0d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:19:00 -0800 Subject: [PATCH 24/28] xfs: check the metadata directory inumber in superblocks When metadata directories are enabled, make sure that the secondary superblocks point to the metadata directory. This isn't strictly required because the secondaries are only used to recover damaged filesystems, and the metadir root inumber is fixed. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/agheader.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index f8e5b67128d2..cad997f38a42 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -144,6 +144,11 @@ xchk_superblock( if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino)) xchk_block_set_preen(sc, bp); + if (xfs_has_metadir(sc->mp)) { + if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino)) + xchk_block_set_preen(sc, bp); + } + if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino)) xchk_block_set_preen(sc, bp); From 9dc31acb01a1c7649c35b7954562a9a33b817c33 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:19:01 -0800 Subject: [PATCH 25/28] xfs: move repair temporary files to the metadata directory tree Due to resource acquisition rules, we have to create the ondisk temporary files used to stage a filesystem repair before we can acquire a reference to the inode that we actually want to repair. Therefore, we do not know at tempfile creation time whether the tempfile will belong to the regular directory tree or the metadata directory tree. This distinction becomes important when the swapext code tries to figure out the quota accounting of the two files whose mappings are being swapped. The swapext code assumes that accounting updates are required for a file if dqattach attaches dquots. Metadir files are never accounted in quota, which means that swapext must not update the quota accounting when swapping in a repaired directory/xattr/rtbitmap structure. Prior to the swapext call, therefore, both files must be marked as METADIR for dqattach so that dqattach will ignore them. Add support for a repair tempfile to be switched to the metadir tree and switched back before being released so that ifree will just free the file. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/common.c | 5 +++ fs/xfs/scrub/tempfile.c | 97 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/tempfile.h | 3 ++ 3 files changed, 105 insertions(+) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 6d955580f608..c26a3314237a 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -39,6 +39,7 @@ #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/health.h" +#include "scrub/tempfile.h" /* Common code for the metadata scrubbers. */ @@ -1090,6 +1091,10 @@ xchk_setup_inode_contents( if (error) return error; + error = xrep_tempfile_adjust_directory_tree(sc); + if (error) + return error; + /* Lock the inode so the VFS cannot touch this file. */ xchk_ilock(sc, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index 3c5a1d77fefa..4b7f7860e37e 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -22,6 +22,7 @@ #include "xfs_exchmaps.h" #include "xfs_defer.h" #include "xfs_symlink_remote.h" +#include "xfs_metafile.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/repair.h" @@ -182,6 +183,101 @@ xrep_tempfile_create( return error; } +/* + * Temporary files have to be created before we even know which inode we're + * going to scrub, so we assume that they will be part of the regular directory + * tree. If it turns out that we're actually scrubbing a file from the + * metadata directory tree, we have to subtract the temp file from the root + * dquots and detach the dquots. + */ +int +xrep_tempfile_adjust_directory_tree( + struct xfs_scrub *sc) +{ + int error; + + if (!sc->tempip) + return 0; + + ASSERT(sc->tp == NULL); + ASSERT(!xfs_is_metadir_inode(sc->tempip)); + + if (!sc->ip || !xfs_is_metadir_inode(sc->ip)) + return 0; + + xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); + sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; + + error = xchk_trans_alloc(sc, 0); + if (error) + goto out_iolock; + + xrep_tempfile_ilock(sc); + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + + /* Metadir files are not accounted in quota, so drop icount */ + xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L); + xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN); + + error = xrep_trans_commit(sc); + if (error) + goto out_ilock; + + xfs_qm_dqdetach(sc->tempip); +out_ilock: + xrep_tempfile_iunlock(sc); +out_iolock: + xrep_tempfile_iounlock(sc); + return error; +} + +/* + * Remove this temporary file from the metadata directory tree so that it can + * be inactivated the normal way. + */ +STATIC int +xrep_tempfile_remove_metadir( + struct xfs_scrub *sc) +{ + int error; + + if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip)) + return 0; + + ASSERT(sc->tp == NULL); + + xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); + sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; + + error = xchk_trans_alloc(sc, 0); + if (error) + goto out_iolock; + + xrep_tempfile_ilock(sc); + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + + xfs_metafile_clear_iflag(sc->tp, sc->tempip); + + /* Non-metadir files are accounted in quota, so bump bcount/icount */ + error = xfs_qm_dqattach_locked(sc->tempip, false); + if (error) + goto out_cancel; + + xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L); + xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT, + sc->tempip->i_nblocks); + error = xrep_trans_commit(sc); + goto out_ilock; + +out_cancel: + xchk_trans_cancel(sc); +out_ilock: + xrep_tempfile_iunlock(sc); +out_iolock: + xrep_tempfile_iounlock(sc); + return error; +} + /* Take IOLOCK_EXCL on the temporary file, maybe. */ bool xrep_tempfile_iolock_nowait( @@ -290,6 +386,7 @@ xrep_tempfile_rele( sc->temp_ilock_flags = 0; } + xrep_tempfile_remove_metadir(sc); xchk_irele(sc, sc->tempip); sc->tempip = NULL; } diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h index e51399f595fe..71c1b54599c3 100644 --- a/fs/xfs/scrub/tempfile.h +++ b/fs/xfs/scrub/tempfile.h @@ -10,6 +10,8 @@ int xrep_tempfile_create(struct xfs_scrub *sc, uint16_t mode); void xrep_tempfile_rele(struct xfs_scrub *sc); +int xrep_tempfile_adjust_directory_tree(struct xfs_scrub *sc); + bool xrep_tempfile_iolock_nowait(struct xfs_scrub *sc); int xrep_tempfile_iolock_polled(struct xfs_scrub *sc); void xrep_tempfile_iounlock(struct xfs_scrub *sc); @@ -42,6 +44,7 @@ static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc) xchk_ilock(sc, XFS_IOLOCK_EXCL); } # define xrep_is_tempfile(ip) (false) +# define xrep_tempfile_adjust_directory_tree(sc) (0) # define xrep_tempfile_rele(sc) #endif /* CONFIG_XFS_ONLINE_REPAIR */ From b3c03efa5972f084e40104307dbe432359279cf2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:19:02 -0800 Subject: [PATCH 26/28] xfs: check metadata directory file path connectivity Create a new scrubber type that checks that well known metadata directory paths are connected to the metadata inode that the incore structures think is in use. For example, check that "/quota/user" in the metadata directory tree actually points to mp->m_quotainfo->qi_uquotaip->i_ino. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_fs.h | 13 ++- fs/xfs/libxfs/xfs_health.h | 4 +- fs/xfs/scrub/common.h | 1 + fs/xfs/scrub/health.c | 1 + fs/xfs/scrub/metapath.c | 174 +++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/scrub.c | 9 ++ fs/xfs/scrub/scrub.h | 2 + fs/xfs/scrub/stats.c | 1 + fs/xfs/scrub/trace.c | 1 + fs/xfs/scrub/trace.h | 36 +++++++- fs/xfs/xfs_health.c | 1 + 12 files changed, 241 insertions(+), 3 deletions(-) create mode 100644 fs/xfs/scrub/metapath.c diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index ba418a40aeb5..d80c2817eb48 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -174,6 +174,7 @@ xfs-y += $(addprefix scrub/, \ inode.o \ iscan.o \ listxattr.o \ + metapath.o \ nlinks.o \ parent.o \ readdir.o \ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index b05e6fb14703..faa38a7d1eb0 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -199,6 +199,7 @@ struct xfs_fsop_geom { #define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */ #define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */ #define XFS_FSOP_GEOM_SICK_METADIR (1 << 8) /* metadata directory */ +#define XFS_FSOP_GEOM_SICK_METAPATH (1 << 9) /* metadir tree path */ /* Output for XFS_FS_COUNTS */ typedef struct xfs_fsop_counts { @@ -732,9 +733,10 @@ struct xfs_scrub_metadata { #define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */ #define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */ #define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */ +#define XFS_SCRUB_TYPE_METAPATH 29 /* metadata directory tree paths */ /* Number of scrub subcommands. */ -#define XFS_SCRUB_TYPE_NR 29 +#define XFS_SCRUB_TYPE_NR 30 /* * This special type code only applies to the vectored scrub implementation. @@ -812,6 +814,15 @@ struct xfs_scrub_vec_head { #define XFS_SCRUB_VEC_FLAGS_ALL (0) +/* + * i: sm_ino values for XFS_SCRUB_TYPE_METAPATH to select a metadata file for + * path checking. + */ +#define XFS_SCRUB_METAPATH_PROBE (0) /* do we have a metapath scrubber? */ + +/* Number of metapath sm_ino values */ +#define XFS_SCRUB_METAPATH_NR (1) + /* * ioctl limits */ diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index f90e8dfc0500..a23df94319e5 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -63,6 +63,7 @@ struct xfs_da_args; #define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */ #define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */ #define XFS_SICK_FS_METADIR (1 << 6) /* metadata directory tree */ +#define XFS_SICK_FS_METAPATH (1 << 7) /* metadata directory tree path */ /* Observable health issues for realtime volume metadata. */ #define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */ @@ -107,7 +108,8 @@ struct xfs_da_args; XFS_SICK_FS_PQUOTA | \ XFS_SICK_FS_QUOTACHECK | \ XFS_SICK_FS_NLINKS | \ - XFS_SICK_FS_METADIR) + XFS_SICK_FS_METADIR | \ + XFS_SICK_FS_METAPATH) #define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \ XFS_SICK_RT_SUMMARY) diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index b419adc6e7cf..b2a81e85ded9 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -73,6 +73,7 @@ int xchk_setup_xattr(struct xfs_scrub *sc); int xchk_setup_symlink(struct xfs_scrub *sc); int xchk_setup_parent(struct xfs_scrub *sc); int xchk_setup_dirtree(struct xfs_scrub *sc); +int xchk_setup_metapath(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xchk_setup_rtbitmap(struct xfs_scrub *sc); int xchk_setup_rtsummary(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 6ceef3749e3b..b8b92ff3a573 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -109,6 +109,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK }, [XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS }, [XFS_SCRUB_TYPE_DIRTREE] = { XHG_INO, XFS_SICK_INO_DIRTREE }, + [XFS_SCRUB_TYPE_METAPATH] = { XHG_FS, XFS_SICK_FS_METAPATH }, }; /* Return the health status mask for this scrub type. */ diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c new file mode 100644 index 000000000000..b7bd86df9877 --- /dev/null +++ b/fs/xfs/scrub/metapath.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2023-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_metafile.h" +#include "xfs_quota.h" +#include "xfs_qm.h" +#include "xfs_dir2.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/readdir.h" + +/* + * Metadata Directory Tree Paths + * ============================= + * + * A filesystem with metadir enabled expects to find metadata structures + * attached to files that are accessible by walking a path down the metadata + * directory tree. Given the metadir path and the incore inode storing the + * metadata, this scrubber ensures that the ondisk metadir path points to the + * ondisk inode represented by the incore inode. + */ + +struct xchk_metapath { + struct xfs_scrub *sc; + + /* Name for lookup */ + struct xfs_name xname; + + /* Path for this metadata file and the parent directory */ + const char *path; + const char *parent_path; + + /* Directory parent of the metadata file. */ + struct xfs_inode *dp; + + /* Locks held on dp */ + unsigned int dp_ilock_flags; +}; + +/* Release resources tracked in the buffer. */ +static inline void +xchk_metapath_cleanup( + void *buf) +{ + struct xchk_metapath *mpath = buf; + + if (mpath->dp_ilock_flags) + xfs_iunlock(mpath->dp, mpath->dp_ilock_flags); + kfree(mpath->path); +} + +int +xchk_setup_metapath( + struct xfs_scrub *sc) +{ + if (!xfs_has_metadir(sc->mp)) + return -ENOENT; + if (sc->sm->sm_gen) + return -EINVAL; + + switch (sc->sm->sm_ino) { + case XFS_SCRUB_METAPATH_PROBE: + /* Just probing, nothing else to do. */ + if (sc->sm->sm_agno) + return -EINVAL; + return 0; + default: + return -ENOENT; + } +} + +/* + * Take the ILOCK on the metadata directory parent and child. We do not know + * that the metadata directory is not corrupt, so we lock the parent and try + * to lock the child. Returns 0 if successful, or -EINTR to abort the scrub. + */ +STATIC int +xchk_metapath_ilock_both( + struct xchk_metapath *mpath) +{ + struct xfs_scrub *sc = mpath->sc; + int error = 0; + + while (true) { + xfs_ilock(mpath->dp, XFS_ILOCK_EXCL); + if (xchk_ilock_nowait(sc, XFS_ILOCK_EXCL)) { + mpath->dp_ilock_flags |= XFS_ILOCK_EXCL; + return 0; + } + xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL); + + if (xchk_should_terminate(sc, &error)) + return error; + + delay(1); + } + + ASSERT(0); + return -EINTR; +} + +/* Unlock parent and child inodes. */ +static inline void +xchk_metapath_iunlock( + struct xchk_metapath *mpath) +{ + struct xfs_scrub *sc = mpath->sc; + + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + mpath->dp_ilock_flags &= ~XFS_ILOCK_EXCL; + xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL); +} + +int +xchk_metapath( + struct xfs_scrub *sc) +{ + struct xchk_metapath *mpath = sc->buf; + xfs_ino_t ino = NULLFSINO; + int error; + + /* Just probing, nothing else to do. */ + if (sc->sm->sm_ino == XFS_SCRUB_METAPATH_PROBE) + return 0; + + /* Parent required to do anything else. */ + if (mpath->dp == NULL) { + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + return 0; + } + + error = xchk_trans_alloc_empty(sc); + if (error) + return error; + + error = xchk_metapath_ilock_both(mpath); + if (error) + goto out_cancel; + + /* Make sure the parent dir has a dirent pointing to this file. */ + error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino); + trace_xchk_metapath_lookup(sc, mpath->path, mpath->dp, ino); + if (error == -ENOENT) { + /* No directory entry at all */ + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + error = 0; + goto out_ilock; + } + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) + goto out_ilock; + if (ino != sc->ip->i_ino) { + /* Pointing to wrong inode */ + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + } + +out_ilock: + xchk_metapath_iunlock(mpath); +out_cancel: + xchk_trans_cancel(sc); + return error; +} diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 8a5c3af4cfda..a30ed3d0dcaf 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -442,6 +442,13 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .has = xfs_has_parent, .repair = xrep_dirtree, }, + [XFS_SCRUB_TYPE_METAPATH] = { /* metadata directory tree path */ + .type = ST_GENERIC, + .setup = xchk_setup_metapath, + .scrub = xchk_metapath, + .has = xfs_has_metadir, + .repair = xrep_notsupported, + }, }; static int @@ -489,6 +496,8 @@ xchk_validate_inputs( if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) goto out; break; + case ST_GENERIC: + break; default: goto out; } diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 5993fcaffb2c..c688ff4fc7fc 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -73,6 +73,7 @@ enum xchk_type { ST_PERAG, /* per-AG metadata */ ST_FS, /* per-FS metadata */ ST_INODE, /* per-inode metadata */ + ST_GENERIC, /* determined by the scrubber */ }; struct xchk_meta_ops { @@ -255,6 +256,7 @@ int xchk_xattr(struct xfs_scrub *sc); int xchk_symlink(struct xfs_scrub *sc); int xchk_parent(struct xfs_scrub *sc); int xchk_dirtree(struct xfs_scrub *sc); +int xchk_metapath(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xchk_rtbitmap(struct xfs_scrub *sc); int xchk_rtsummary(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c index 7996c2335476..edcd02dc2e62 100644 --- a/fs/xfs/scrub/stats.c +++ b/fs/xfs/scrub/stats.c @@ -80,6 +80,7 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_QUOTACHECK] = "quotacheck", [XFS_SCRUB_TYPE_NLINKS] = "nlinks", [XFS_SCRUB_TYPE_DIRTREE] = "dirtree", + [XFS_SCRUB_TYPE_METAPATH] = "metapath", }; /* Format the scrub stats into a text buffer, similar to pcp style. */ diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 4470ad0533b8..98f923ae664d 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -20,6 +20,7 @@ #include "xfs_dir2.h" #include "xfs_rmap.h" #include "xfs_parent.h" +#include "xfs_metafile.h" #include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index bc246d86a5c8..bb52baaa2fa7 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -70,6 +70,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER); +TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_METAPATH); #define XFS_SCRUB_TYPE_STRINGS \ { XFS_SCRUB_TYPE_PROBE, "probe" }, \ @@ -101,7 +102,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER); { XFS_SCRUB_TYPE_NLINKS, "nlinks" }, \ { XFS_SCRUB_TYPE_HEALTHY, "healthy" }, \ { XFS_SCRUB_TYPE_DIRTREE, "dirtree" }, \ - { XFS_SCRUB_TYPE_BARRIER, "barrier" } + { XFS_SCRUB_TYPE_BARRIER, "barrier" }, \ + { XFS_SCRUB_TYPE_METAPATH, "metapath" } #define XFS_SCRUB_FLAG_STRINGS \ { XFS_SCRUB_IFLAG_REPAIR, "repair" }, \ @@ -1916,6 +1918,38 @@ TRACE_EVENT(xchk_dirtree_live_update, __get_str(name)) ); +DECLARE_EVENT_CLASS(xchk_metapath_class, + TP_PROTO(struct xfs_scrub *sc, const char *path, + struct xfs_inode *dp, xfs_ino_t ino), + TP_ARGS(sc, path, dp, ino), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, scrub_ino) + __field(xfs_ino_t, parent_ino) + __field(xfs_ino_t, ino) + __string(name, path) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->scrub_ino = sc->ip ? sc->ip->i_ino : NULLFSINO; + __entry->parent_ino = dp ? dp->i_ino : NULLFSINO; + __entry->ino = ino; + __assign_str(name); + ), + TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx name '%s' ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->scrub_ino, + __entry->parent_ino, + __get_str(name), + __entry->ino) +); +#define DEFINE_XCHK_METAPATH_EVENT(name) \ +DEFINE_EVENT(xchk_metapath_class, name, \ + TP_PROTO(struct xfs_scrub *sc, const char *path, \ + struct xfs_inode *dp, xfs_ino_t ino), \ + TP_ARGS(sc, path, dp, ino)) +DEFINE_XCHK_METAPATH_EVENT(xchk_metapath_lookup); + /* repair tracepoints */ #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 238258701450..e5663e2ac9b8 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -381,6 +381,7 @@ static const struct ioctl_sick_map fs_map[] = { { XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK }, { XFS_SICK_FS_NLINKS, XFS_FSOP_GEOM_SICK_NLINKS }, { XFS_SICK_FS_METADIR, XFS_FSOP_GEOM_SICK_METADIR }, + { XFS_SICK_FS_METAPATH, XFS_FSOP_GEOM_SICK_METAPATH }, { 0, 0 }, }; From 87b7c205da8a7d90958c7e64fe5014a1d2f06b63 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:19:02 -0800 Subject: [PATCH 27/28] xfs: confirm dotdot target before replacing it during a repair xfs_dir_replace trips an assertion if you tell it to change a dirent to point to an inumber that it already points at. Look up the dotdot entry directly to confirm that we need to make a change. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/dir_repair.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 2456cf1cb744..249313882108 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -1638,6 +1638,7 @@ xrep_dir_swap( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; + xfs_ino_t ino; bool ip_local, temp_local; int error = 0; @@ -1655,14 +1656,17 @@ xrep_dir_swap( /* * Reset the temporary directory's '..' entry to point to the parent - * that we found. The temporary directory was created with the root - * directory as the parent, so we can skip this if repairing a - * subdirectory of the root. + * that we found. The dirent replace code asserts if the dirent + * already points at the new inumber, so we look it up here. * * It's also possible that this replacement could also expand a sf * tempdir into block format. */ - if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) { + error = xchk_dir_lookup(sc, rd->sc->tempip, &xfs_name_dotdot, &ino); + if (error) + return error; + + if (rd->pscan.parent_ino != ino) { error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot, rd->pscan.parent_ino, rd->tx.req.resblks); if (error) From 0d2c636e489c115add86bd66952880f92b5edab7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Nov 2024 20:19:03 -0800 Subject: [PATCH 28/28] xfs: repair metadata directory file path connectivity Fix disconnected or incorrect metadata directory paths. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/metapath.c | 351 +++++++++++++++++++++++++++++++++++++++- fs/xfs/scrub/repair.h | 3 + fs/xfs/scrub/scrub.c | 2 +- fs/xfs/scrub/trace.h | 5 + 4 files changed, 358 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c index b7bd86df9877..edc1a395c401 100644 --- a/fs/xfs/scrub/metapath.c +++ b/fs/xfs/scrub/metapath.c @@ -16,10 +16,15 @@ #include "xfs_quota.h" #include "xfs_qm.h" #include "xfs_dir2.h" +#include "xfs_parent.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" +#include "xfs_attr.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/readdir.h" +#include "scrub/repair.h" /* * Metadata Directory Tree Paths @@ -38,15 +43,28 @@ struct xchk_metapath { /* Name for lookup */ struct xfs_name xname; - /* Path for this metadata file and the parent directory */ + /* Directory update for repairs */ + struct xfs_dir_update du; + + /* Path down to this metadata file from the parent directory */ const char *path; - const char *parent_path; /* Directory parent of the metadata file. */ struct xfs_inode *dp; /* Locks held on dp */ unsigned int dp_ilock_flags; + + /* Transaction block reservations */ + unsigned int link_resblks; + unsigned int unlink_resblks; + + /* Parent pointer updates */ + struct xfs_parent_args link_ppargs; + struct xfs_parent_args unlink_ppargs; + + /* Scratchpads for removing links */ + struct xfs_da_args pptr_args; }; /* Release resources tracked in the buffer. */ @@ -172,3 +190,332 @@ xchk_metapath( xchk_trans_cancel(sc); return error; } + +#ifdef CONFIG_XFS_ONLINE_REPAIR +/* Create the dirent represented by the final component of the path. */ +STATIC int +xrep_metapath_link( + struct xchk_metapath *mpath) +{ + struct xfs_scrub *sc = mpath->sc; + + mpath->du.dp = mpath->dp; + mpath->du.name = &mpath->xname; + mpath->du.ip = sc->ip; + + if (xfs_has_parent(sc->mp)) + mpath->du.ppargs = &mpath->link_ppargs; + else + mpath->du.ppargs = NULL; + + trace_xrep_metapath_link(sc, mpath->path, mpath->dp, sc->ip->i_ino); + + return xfs_dir_add_child(sc->tp, mpath->link_resblks, &mpath->du); +} + +/* Remove the dirent at the final component of the path. */ +STATIC int +xrep_metapath_unlink( + struct xchk_metapath *mpath, + xfs_ino_t ino, + struct xfs_inode *ip) +{ + struct xfs_parent_rec rec; + struct xfs_scrub *sc = mpath->sc; + struct xfs_mount *mp = sc->mp; + int error; + + trace_xrep_metapath_unlink(sc, mpath->path, mpath->dp, ino); + + if (!ip) { + /* The child inode isn't allocated. Junk the dirent. */ + xfs_trans_log_inode(sc->tp, mpath->dp, XFS_ILOG_CORE); + return xfs_dir_removename(sc->tp, mpath->dp, &mpath->xname, + ino, mpath->unlink_resblks); + } + + mpath->du.dp = mpath->dp; + mpath->du.name = &mpath->xname; + mpath->du.ip = ip; + mpath->du.ppargs = NULL; + + /* Figure out if we're removing a parent pointer too. */ + if (xfs_has_parent(mp)) { + xfs_inode_to_parent_rec(&rec, ip); + error = xfs_parent_lookup(sc->tp, ip, &mpath->xname, &rec, + &mpath->pptr_args); + switch (error) { + case -ENOATTR: + break; + case 0: + mpath->du.ppargs = &mpath->unlink_ppargs; + break; + default: + return error; + } + } + + return xfs_dir_remove_child(sc->tp, mpath->unlink_resblks, &mpath->du); +} + +/* + * Try to create a dirent in @mpath->dp with the name @mpath->xname that points + * to @sc->ip. Returns: + * + * -EEXIST and an @alleged_child if the dirent that points to the wrong inode; + * 0 if there is now a dirent pointing to @sc->ip; or + * A negative errno on error. + */ +STATIC int +xrep_metapath_try_link( + struct xchk_metapath *mpath, + xfs_ino_t *alleged_child) +{ + struct xfs_scrub *sc = mpath->sc; + xfs_ino_t ino; + int error; + + /* Allocate transaction, lock inodes, join to transaction. */ + error = xchk_trans_alloc(sc, mpath->link_resblks); + if (error) + return error; + + error = xchk_metapath_ilock_both(mpath); + if (error) { + xchk_trans_cancel(sc); + return error; + } + xfs_trans_ijoin(sc->tp, mpath->dp, 0); + xfs_trans_ijoin(sc->tp, sc->ip, 0); + + error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino); + trace_xrep_metapath_lookup(sc, mpath->path, mpath->dp, ino); + if (error == -ENOENT) { + /* + * There is no dirent in the directory. Create an entry + * pointing to @sc->ip. + */ + error = xrep_metapath_link(mpath); + if (error) + goto out_cancel; + + error = xrep_trans_commit(sc); + xchk_metapath_iunlock(mpath); + return error; + } + if (error) + goto out_cancel; + + if (ino == sc->ip->i_ino) { + /* The dirent already points to @sc->ip; we're done. */ + error = 0; + goto out_cancel; + } + + /* + * The dirent points elsewhere; pass that back so that the caller + * can try to remove the dirent. + */ + *alleged_child = ino; + error = -EEXIST; + +out_cancel: + xchk_trans_cancel(sc); + xchk_metapath_iunlock(mpath); + return error; +} + +/* + * Take the ILOCK on the metadata directory parent and a bad child, if one is + * supplied. We do not know that the metadata directory is not corrupt, so we + * lock the parent and try to lock the child. Returns 0 if successful, or + * -EINTR to abort the repair. The lock state of @dp is not recorded in @mpath. + */ +STATIC int +xchk_metapath_ilock_parent_and_child( + struct xchk_metapath *mpath, + struct xfs_inode *ip) +{ + struct xfs_scrub *sc = mpath->sc; + int error = 0; + + while (true) { + xfs_ilock(mpath->dp, XFS_ILOCK_EXCL); + if (!ip || xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return 0; + xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL); + + if (xchk_should_terminate(sc, &error)) + return error; + + delay(1); + } + + ASSERT(0); + return -EINTR; +} + +/* + * Try to remove a dirent in @mpath->dp with the name @mpath->xname that points + * to @alleged_child. Returns: + * + * 0 if there is no longer a dirent; + * -EEXIST if the dirent points to @sc->ip; + * -EAGAIN and an updated @alleged_child if the dirent points elsewhere; or + * A negative errno for any other error. + */ +STATIC int +xrep_metapath_try_unlink( + struct xchk_metapath *mpath, + xfs_ino_t *alleged_child) +{ + struct xfs_scrub *sc = mpath->sc; + struct xfs_inode *ip = NULL; + xfs_ino_t ino; + int error; + + ASSERT(*alleged_child != sc->ip->i_ino); + + trace_xrep_metapath_try_unlink(sc, mpath->path, mpath->dp, + *alleged_child); + + /* + * Allocate transaction, grab the alleged child inode, lock inodes, + * join to transaction. + */ + error = xchk_trans_alloc(sc, mpath->unlink_resblks); + if (error) + return error; + + error = xchk_iget(sc, *alleged_child, &ip); + if (error == -EINVAL || error == -ENOENT) { + /* inode number is bogus, junk the dirent */ + error = 0; + } + if (error) { + xchk_trans_cancel(sc); + return error; + } + + error = xchk_metapath_ilock_parent_and_child(mpath, ip); + if (error) { + xchk_trans_cancel(sc); + return error; + } + xfs_trans_ijoin(sc->tp, mpath->dp, 0); + if (ip) + xfs_trans_ijoin(sc->tp, ip, 0); + + error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino); + trace_xrep_metapath_lookup(sc, mpath->path, mpath->dp, ino); + if (error == -ENOENT) { + /* + * There is no dirent in the directory anymore. We're ready to + * try the link operation again. + */ + error = 0; + goto out_cancel; + } + if (error) + goto out_cancel; + + if (ino == sc->ip->i_ino) { + /* The dirent already points to @sc->ip; we're done. */ + error = -EEXIST; + goto out_cancel; + } + + /* + * The dirent does not point to the alleged child. Update the caller + * and signal that we want to be called again. + */ + if (ino != *alleged_child) { + *alleged_child = ino; + error = -EAGAIN; + goto out_cancel; + } + + /* Remove the link to the child. */ + error = xrep_metapath_unlink(mpath, ino, ip); + if (error) + goto out_cancel; + + error = xrep_trans_commit(sc); + goto out_unlock; + +out_cancel: + xchk_trans_cancel(sc); +out_unlock: + xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL); + if (ip) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xchk_irele(sc, ip); + } + return error; +} + +/* + * Make sure the metadata directory path points to the child being examined. + * + * Repair needs to be able to create a directory structure, create its own + * transactions, and take ILOCKs. This function /must/ be called after all + * other repairs have completed. + */ +int +xrep_metapath( + struct xfs_scrub *sc) +{ + struct xchk_metapath *mpath = sc->buf; + struct xfs_mount *mp = sc->mp; + int error = 0; + + /* Just probing, nothing to repair. */ + if (sc->sm->sm_ino == XFS_SCRUB_METAPATH_PROBE) + return 0; + + /* Parent required to do anything else. */ + if (mpath->dp == NULL) + return -EFSCORRUPTED; + + /* + * Make sure the child file actually has an attr fork to receive a new + * parent pointer if the fs has parent pointers. + */ + if (xfs_has_parent(mp)) { + error = xfs_attr_add_fork(sc->ip, + sizeof(struct xfs_attr_sf_hdr), 1); + if (error) + return error; + } + + /* Compute block reservation required to unlink and link a file. */ + mpath->unlink_resblks = xfs_remove_space_res(mp, MAXNAMELEN); + mpath->link_resblks = xfs_link_space_res(mp, MAXNAMELEN); + + do { + xfs_ino_t alleged_child; + + /* Re-establish the link, or tell us which inode to remove. */ + error = xrep_metapath_try_link(mpath, &alleged_child); + if (!error) + return 0; + if (error != -EEXIST) + return error; + + /* + * Remove an incorrect link to an alleged child, or tell us + * which inode to remove. + */ + do { + error = xrep_metapath_try_unlink(mpath, &alleged_child); + } while (error == -EAGAIN); + if (error == -EEXIST) { + /* Link established; we're done. */ + error = 0; + break; + } + } while (!error); + + return error; +} +#endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 0e0dc2bf985c..90f9cb3b5ad8 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -134,6 +134,7 @@ int xrep_directory(struct xfs_scrub *sc); int xrep_parent(struct xfs_scrub *sc); int xrep_symlink(struct xfs_scrub *sc); int xrep_dirtree(struct xfs_scrub *sc); +int xrep_metapath(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xrep_rtbitmap(struct xfs_scrub *sc); @@ -208,6 +209,7 @@ xrep_setup_nothing( #define xrep_setup_parent xrep_setup_nothing #define xrep_setup_nlinks xrep_setup_nothing #define xrep_setup_dirtree xrep_setup_nothing +#define xrep_setup_metapath xrep_setup_nothing #define xrep_setup_inode(sc, imap) ((void)0) @@ -243,6 +245,7 @@ static inline int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *x) #define xrep_parent xrep_notsupported #define xrep_symlink xrep_notsupported #define xrep_dirtree xrep_notsupported +#define xrep_metapath xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index a30ed3d0dcaf..1ac33bea6f0a 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -447,7 +447,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .setup = xchk_setup_metapath, .scrub = xchk_metapath, .has = xfs_has_metadir, - .repair = xrep_notsupported, + .repair = xrep_metapath, }, }; diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index bb52baaa2fa7..b6c8d0944fa4 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -3598,6 +3598,11 @@ DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_delete_path); DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_create_adoption); DEFINE_XCHK_DIRTREE_EVALUATE_EVENT(xrep_dirtree_decided_fate); +DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_lookup); +DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_try_unlink); +DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_unlink); +DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_link); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */