From 63211876ced33fbb730f515e8d830de53533fc82 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:35 -0700 Subject: [PATCH 01/30] xfs: rearrange xfs_attr_match parameters Rearrange the parameters to this function so that they match the order of attr listent: attr_flags -> name -> namelen -> value -> valuelen. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_leaf.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 3b024ab892e6..bb00183d1349 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -510,9 +510,9 @@ xfs_attr3_leaf_read( static bool xfs_attr_match( struct xfs_da_args *args, - uint8_t namelen, - unsigned char *name, - int flags) + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen) { if (args->namelen != namelen) @@ -522,12 +522,12 @@ xfs_attr_match( /* Recovery ignores the INCOMPLETE flag. */ if ((args->op_flags & XFS_DA_OP_RECOVERY) && - args->attr_filter == (flags & XFS_ATTR_NSP_ONDISK_MASK)) + args->attr_filter == (attr_flags & XFS_ATTR_NSP_ONDISK_MASK)) return true; /* All remaining matches need to be filtered by INCOMPLETE state. */ if (args->attr_filter != - (flags & (XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE))) + (attr_flags & (XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE))) return false; return true; } @@ -746,8 +746,8 @@ xfs_attr_sf_findname( for (sfe = xfs_attr_sf_firstentry(sf); sfe < xfs_attr_sf_endptr(sf); sfe = xfs_attr_sf_nextentry(sfe)) { - if (xfs_attr_match(args, sfe->namelen, sfe->nameval, - sfe->flags)) + if (xfs_attr_match(args, sfe->flags, sfe->nameval, + sfe->namelen)) return sfe; } @@ -2443,15 +2443,16 @@ xfs_attr3_leaf_lookup_int( */ if (entry->flags & XFS_ATTR_LOCAL) { name_loc = xfs_attr3_leaf_name_local(leaf, probe); - if (!xfs_attr_match(args, name_loc->namelen, - name_loc->nameval, entry->flags)) + if (!xfs_attr_match(args, entry->flags, + name_loc->nameval, + name_loc->namelen)) continue; args->index = probe; return -EEXIST; } else { name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); - if (!xfs_attr_match(args, name_rmt->namelen, - name_rmt->name, entry->flags)) + if (!xfs_attr_match(args, entry->flags, name_rmt->name, + name_rmt->namelen)) continue; args->index = probe; args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); From f49af061f49c004fb6df7f791f39f9ed370f767b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Apr 2024 09:47:36 -0700 Subject: [PATCH 02/30] xfs: check the flags earlier in xfs_attr_match Checking the flags match is much cheaper than a memcmp, so do it early on in xfs_attr_match, and also add a little helper to calculate the match mask right under the comment explaining the logic for it. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_attr_leaf.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index bb00183d1349..c47fad39744e 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -507,6 +507,13 @@ xfs_attr3_leaf_read( * INCOMPLETE flag will not be set in attr->attr_filter, but rather * XFS_DA_OP_RECOVERY will be set in args->op_flags. */ +static inline unsigned int xfs_attr_match_mask(const struct xfs_da_args *args) +{ + if (args->op_flags & XFS_DA_OP_RECOVERY) + return XFS_ATTR_NSP_ONDISK_MASK; + return XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE; +} + static bool xfs_attr_match( struct xfs_da_args *args, @@ -514,21 +521,15 @@ xfs_attr_match( const unsigned char *name, unsigned int namelen) { + unsigned int mask = xfs_attr_match_mask(args); if (args->namelen != namelen) return false; + if ((args->attr_filter & mask) != (attr_flags & mask)) + return false; if (memcmp(args->name, name, namelen) != 0) return false; - /* Recovery ignores the INCOMPLETE flag. */ - if ((args->op_flags & XFS_DA_OP_RECOVERY) && - args->attr_filter == (attr_flags & XFS_ATTR_NSP_ONDISK_MASK)) - return true; - - /* All remaining matches need to be filtered by INCOMPLETE state. */ - if (args->attr_filter != - (attr_flags & (XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE))) - return false; return true; } From 9713dc88773d066413ae23aa474b13241507a89e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:37 -0700 Subject: [PATCH 03/30] xfs: move xfs_attr_defer_add to xfs_attr_item.c Move the code that adds the incore xfs_attr_item deferred work data to a transaction live with the ATTRI log item code. This means that the upper level extended attribute code no longer has to know about the inner workings of the ATTRI log items. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 37 +++---------------------------------- fs/xfs/xfs_attr_item.c | 30 ++++++++++++++++++++++++++++++ fs/xfs/xfs_attr_item.h | 8 ++++++++ 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 629fb25d149c..50eab63ff3be 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -901,37 +901,6 @@ xfs_attr_lookup( return error; } -static void -xfs_attr_defer_add( - struct xfs_da_args *args, - unsigned int op_flags) -{ - - struct xfs_attr_intent *new; - - new = kmem_cache_zalloc(xfs_attr_intent_cache, - GFP_KERNEL | __GFP_NOFAIL); - new->xattri_op_flags = op_flags; - new->xattri_da_args = args; - - switch (op_flags) { - case XFS_ATTRI_OP_FLAGS_SET: - new->xattri_dela_state = xfs_attr_init_add_state(args); - break; - case XFS_ATTRI_OP_FLAGS_REPLACE: - new->xattri_dela_state = xfs_attr_init_replace_state(args); - break; - case XFS_ATTRI_OP_FLAGS_REMOVE: - new->xattri_dela_state = xfs_attr_init_remove_state(args); - break; - default: - ASSERT(0); - } - - xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type); - trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp); -} - int xfs_attr_set( struct xfs_da_args *args, @@ -1021,14 +990,14 @@ xfs_attr_set( case -EEXIST: if (op == XFS_ATTRUPDATE_REMOVE) { /* if no value, we are performing a remove operation */ - xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REMOVE); + xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE); break; } /* Pure create fails if the attr already exists */ if (op == XFS_ATTRUPDATE_CREATE) goto out_trans_cancel; - xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REPLACE); + xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE); break; case -ENOATTR: /* Can't remove what isn't there. */ @@ -1038,7 +1007,7 @@ xfs_attr_set( /* Pure replace fails if no existing attr to replace. */ if (op == XFS_ATTRUPDATE_REPLACE) goto out_trans_cancel; - xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_SET); + xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET); break; default: goto out_trans_cancel; diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index a65ac7479768..a7d6c9af47e8 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -727,6 +727,36 @@ xfs_attr_create_done( return &attrdp->attrd_item; } +void +xfs_attr_defer_add( + struct xfs_da_args *args, + enum xfs_attr_defer_op op) +{ + struct xfs_attr_intent *new; + + new = kmem_cache_zalloc(xfs_attr_intent_cache, + GFP_NOFS | __GFP_NOFAIL); + new->xattri_da_args = args; + + switch (op) { + case XFS_ATTR_DEFER_SET: + new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_SET; + new->xattri_dela_state = xfs_attr_init_add_state(args); + break; + case XFS_ATTR_DEFER_REPLACE: + new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_REPLACE; + new->xattri_dela_state = xfs_attr_init_replace_state(args); + break; + case XFS_ATTR_DEFER_REMOVE: + new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_REMOVE; + new->xattri_dela_state = xfs_attr_init_remove_state(args); + break; + } + + xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type); + trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp); +} + const struct xfs_defer_op_type xfs_attr_defer_type = { .name = "attr", .max_items = 1, diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h index 3280a7930287..c32b669b0e16 100644 --- a/fs/xfs/xfs_attr_item.h +++ b/fs/xfs/xfs_attr_item.h @@ -51,4 +51,12 @@ struct xfs_attrd_log_item { extern struct kmem_cache *xfs_attri_cache; extern struct kmem_cache *xfs_attrd_cache; +enum xfs_attr_defer_op { + XFS_ATTR_DEFER_SET, + XFS_ATTR_DEFER_REMOVE, + XFS_ATTR_DEFER_REPLACE, +}; + +void xfs_attr_defer_add(struct xfs_da_args *args, enum xfs_attr_defer_op op); + #endif /* __XFS_ATTR_ITEM_H__ */ From a64e0134754bf88021e937aa34f1fbb5b524e585 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:38 -0700 Subject: [PATCH 04/30] xfs: create a separate hashname function for extended attributes Create a separate function to compute name hashvalues for extended attributes. When we get to parent pointers we'll be altering the rules so that metadump obfuscation doesn't turn heinous. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 28 ++++++++++++++++++++++++++-- fs/xfs/libxfs/xfs_attr.h | 14 ++++++++++++++ fs/xfs/libxfs/xfs_attr_leaf.c | 3 +-- fs/xfs/scrub/attr.c | 11 ++++++++--- fs/xfs/xfs_attr_item.c | 2 +- fs/xfs/xfs_attr_list.c | 5 ++++- 6 files changed, 54 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 50eab63ff3be..8262c263be9d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -280,7 +280,7 @@ xfs_attr_get( args->owner = args->dp->i_ino; args->geo = args->dp->i_mount->m_attr_geo; args->whichfork = XFS_ATTR_FORK; - args->hashval = xfs_da_hashname(args->name, args->namelen); + xfs_attr_sethash(args); /* Entirely possible to look up a name which doesn't exist */ args->op_flags = XFS_DA_OP_OKNOENT; @@ -415,6 +415,30 @@ xfs_attr_sf_addname( return error; } +/* Compute the hash value for a user/root/secure extended attribute */ +xfs_dahash_t +xfs_attr_hashname( + const uint8_t *name, + int namelen) +{ + return xfs_da_hashname(name, namelen); +} + +/* Compute the hash value for any extended attribute from any namespace. */ +xfs_dahash_t +xfs_attr_hashval( + struct xfs_mount *mp, + unsigned int attr_flags, + const uint8_t *name, + int namelen, + const void *value, + int valuelen) +{ + ASSERT(xfs_attr_check_namespace(attr_flags)); + + return xfs_attr_hashname(name, namelen); +} + /* * Handle the state change on completion of a multi-state attr operation. * @@ -925,7 +949,7 @@ xfs_attr_set( args->owner = args->dp->i_ino; args->geo = mp->m_attr_geo; args->whichfork = XFS_ATTR_FORK; - args->hashval = xfs_da_hashname(args->name, args->namelen); + xfs_attr_sethash(args); /* * We have no control over the attribute names that userspace passes us diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index cd106b0a424f..c63b1d610e53 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -628,6 +628,20 @@ xfs_attr_init_replace_state(struct xfs_da_args *args) return xfs_attr_init_add_state(args); } +xfs_dahash_t xfs_attr_hashname(const uint8_t *name, int namelen); + +xfs_dahash_t xfs_attr_hashval(struct xfs_mount *mp, unsigned int attr_flags, + const uint8_t *name, int namelen, const void *value, + int valuelen); + +/* Set the hash value for any extended attribute from any namespace. */ +static inline void xfs_attr_sethash(struct xfs_da_args *args) +{ + args->hashval = xfs_attr_hashval(args->dp->i_mount, args->attr_filter, + args->name, args->namelen, + args->value, args->valuelen); +} + extern struct kmem_cache *xfs_attr_intent_cache; int __init xfs_attr_intent_init_cache(void); void xfs_attr_intent_destroy_cache(void); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index c47fad39744e..e54a8372a30a 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -948,14 +948,13 @@ xfs_attr_shortform_to_leaf( nargs.namelen = sfe->namelen; nargs.value = &sfe->nameval[nargs.namelen]; nargs.valuelen = sfe->valuelen; - nargs.hashval = xfs_da_hashname(sfe->nameval, - sfe->namelen); nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK; if (!xfs_attr_check_namespace(sfe->flags)) { xfs_da_mark_sick(args); error = -EFSCORRUPTED; goto out; } + xfs_attr_sethash(&nargs); error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ ASSERT(error == -ENOATTR); error = xfs_attr3_leaf_add(bp, &nargs); diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 7789bd2f0950..22d7ef4df169 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -179,7 +179,6 @@ xchk_xattr_actor( .dp = ip, .name = name, .namelen = namelen, - .hashval = xfs_da_hashname(name, namelen), .trans = sc->tp, .valuelen = valuelen, .owner = ip->i_ino, @@ -230,6 +229,7 @@ xchk_xattr_actor( args.value = ab->value; + xfs_attr_sethash(&args); error = xfs_attr_get_ilocked(&args); /* ENODATA means the hash lookup failed and the attr is bad */ if (error == -ENODATA) @@ -525,7 +525,10 @@ xchk_xattr_rec( xchk_da_set_corrupt(ds, level); goto out; } - calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen); + calc_hash = xfs_attr_hashval(mp, ent->flags, lentry->nameval, + lentry->namelen, + lentry->nameval + lentry->namelen, + be16_to_cpu(lentry->valuelen)); } else { rentry = (struct xfs_attr_leaf_name_remote *) (((char *)bp->b_addr) + nameidx); @@ -533,7 +536,9 @@ xchk_xattr_rec( xchk_da_set_corrupt(ds, level); goto out; } - calc_hash = xfs_da_hashname(rentry->name, rentry->namelen); + calc_hash = xfs_attr_hashval(mp, ent->flags, rentry->name, + rentry->namelen, NULL, + be32_to_cpu(rentry->valuelen)); } if (calc_hash != hash) xchk_da_set_corrupt(ds, level); diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index a7d6c9af47e8..4a57bcff49eb 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -582,13 +582,13 @@ xfs_attri_recover_work( args->whichfork = XFS_ATTR_FORK; args->name = nv->name.i_addr; args->namelen = nv->name.i_len; - args->hashval = xfs_da_hashname(args->name, args->namelen); args->value = nv->value.i_addr; args->valuelen = nv->value.i_len; args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK; args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT | XFS_DA_OP_LOGGED; args->owner = args->dp->i_ino; + xfs_attr_sethash(args); switch (xfs_attr_intent_op(attr)) { case XFS_ATTRI_OP_FLAGS_SET: diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 903ed46c6887..9bc4b5322539 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -135,12 +135,15 @@ xfs_attr_shortform_list( } sbp->entno = i; - sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen); sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; + sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags, + sfe->nameval, sfe->namelen, + sfe->nameval + sfe->namelen, + sfe->valuelen); sfe = xfs_attr_sf_nextentry(sfe); sbp++; nsbuf++; From 98493ff878859eb0adefbc57a49ad47a92dfd252 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:38 -0700 Subject: [PATCH 05/30] xfs: add parent pointer support to attribute code Add the new parent attribute type. XFS_ATTR_PARENT is used only for parent pointer entries; it uses reserved blocks like XFS_ATTR_ROOT. Signed-off-by: Mark Tinguely Signed-off-by: Dave Chinner Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_da_format.h | 9 +++++++-- fs/xfs/libxfs/xfs_log_format.h | 1 + fs/xfs/xfs_trace.h | 3 ++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index ecd0616f5776..0c80f7ab9475 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -714,13 +714,17 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */ #define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */ #define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */ +#define XFS_ATTR_PARENT_BIT 3 /* parent pointer attrs */ #define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */ #define XFS_ATTR_LOCAL (1u << XFS_ATTR_LOCAL_BIT) #define XFS_ATTR_ROOT (1u << XFS_ATTR_ROOT_BIT) #define XFS_ATTR_SECURE (1u << XFS_ATTR_SECURE_BIT) +#define XFS_ATTR_PARENT (1u << XFS_ATTR_PARENT_BIT) #define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT) -#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) +#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | \ + XFS_ATTR_SECURE | \ + XFS_ATTR_PARENT) #define XFS_ATTR_ONDISK_MASK (XFS_ATTR_NSP_ONDISK_MASK | \ XFS_ATTR_LOCAL | \ @@ -729,7 +733,8 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_NAMESPACE_STR \ { XFS_ATTR_LOCAL, "local" }, \ { XFS_ATTR_ROOT, "root" }, \ - { XFS_ATTR_SECURE, "secure" } + { XFS_ATTR_SECURE, "secure" }, \ + { XFS_ATTR_PARENT, "parent" } /* * Alignment for namelist and valuelist entries (since they are mixed diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index accba2acd623..020aebd10143 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -1034,6 +1034,7 @@ struct xfs_icreate_log { */ #define XFS_ATTRI_FILTER_MASK (XFS_ATTR_ROOT | \ XFS_ATTR_SECURE | \ + XFS_ATTR_PARENT | \ XFS_ATTR_INCOMPLETE) /* diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 57f225ba7e8a..5621db48e763 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -91,7 +91,8 @@ struct xfs_exchrange; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ { XFS_ATTR_SECURE, "SECURE" }, \ - { XFS_ATTR_INCOMPLETE, "INCOMPLETE" } + { XFS_ATTR_INCOMPLETE, "INCOMPLETE" }, \ + { XFS_ATTR_PARENT, "PARENT" } DECLARE_EVENT_CLASS(xfs_attr_list_class, TP_PROTO(struct xfs_attr_list_context *ctx), From 8337d58ab2868f231a29824cd86d2e309bd36fa9 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:39 -0700 Subject: [PATCH 06/30] xfs: define parent pointer ondisk extended attribute format We need to define the parent pointer attribute format before we start adding support for it into all the code that needs to use it. The EA format we will use encodes the following information: name={dirent name} value={parent inumber, parent inode generation} hash=xfs_dir2_hashname(dirent name) ^ (parent_inumber) The inode/gen gives all the information we need to reliably identify the parent without requiring child->parent lock ordering, and allows userspace to do pathname component level reconstruction without the kernel ever needing to verify the parent itself as part of ioctl calls. By using the name-value lookup mode in the extended attribute code to match parent pointers using both the xattr name and value, we can identify the exact parent pointer EA we need to modify/remove in rename/unlink operations without searching the entire EA space. By storing the dirent name, we have enough information to be able to validate and reconstruct damaged directory trees. Earlier iterations of this patchset encoded the directory offset in the parent pointer key, but this format required repair to keep that in sync across directory rebuilds, which is unnecessary complexity. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_da_format.h | 13 +++++++++++++ fs/xfs/libxfs/xfs_ondisk.h | 1 + 2 files changed, 14 insertions(+) diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 0c80f7ab9475..1395ad1937c5 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -890,4 +890,17 @@ static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp) xfs_failaddr_t xfs_da3_blkinfo_verify(struct xfs_buf *bp, struct xfs_da3_blkinfo *hdr3); +/* + * Parent pointer attribute format definition + * + * The xattr name contains the dirent name. + * The xattr value encodes the parent inode number and generation to ease + * opening parents by handle. + * The xattr hashval is xfs_dir2_namehash() ^ p_ino + */ +struct xfs_parent_rec { + __be64 p_ino; + __be32 p_gen; +} __packed; + #endif /* __XFS_DA_FORMAT_H__ */ diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 81885a6a028e..25952ef584ee 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -119,6 +119,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, offset, 1); XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, name, 3); XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10); + XFS_CHECK_STRUCT_SIZE(struct xfs_parent_rec, 12); /* log structures */ XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); From f041455eb5773eda3291903ad6d1f33d4798e9a2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:40 -0700 Subject: [PATCH 07/30] xfs: allow xattr matching on name and value for parent pointers If a file is hardlinked with the same name but from multiple parents, the parent pointers will all have the same dirent name (== attr name) but with different parent_ino/parent_gen values. To disambiguate, we need to be able to match on both the attr name and the attr value. This is in contrast to regular xattrs, which are matchtg edit d only on name. Therefore, plumb in the ability to match shortform and local attrs on name and value in the XFS_ATTR_PARENT namespace. Parent pointer attr values are never large enough to be stored in a remote attr, so we need can reject these cases as corruption. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_leaf.c | 52 +++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index e54a8372a30a..1a374c6885d7 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -514,12 +514,37 @@ static inline unsigned int xfs_attr_match_mask(const struct xfs_da_args *args) return XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE; } +static inline bool +xfs_attr_parent_match( + const struct xfs_da_args *args, + const void *value, + unsigned int valuelen) +{ + ASSERT(args->value != NULL); + + /* Parent pointers do not use remote values */ + if (!value) + return false; + + /* + * The only value we support is a parent rec. However, we'll accept + * any valuelen so that offline repair can delete ATTR_PARENT values + * that are not parent pointers. + */ + if (valuelen != args->valuelen) + return false; + + return memcmp(args->value, value, valuelen) == 0; +} + static bool xfs_attr_match( struct xfs_da_args *args, unsigned int attr_flags, const unsigned char *name, - unsigned int namelen) + unsigned int namelen, + const void *value, + unsigned int valuelen) { unsigned int mask = xfs_attr_match_mask(args); @@ -530,6 +555,9 @@ xfs_attr_match( if (memcmp(args->name, name, namelen) != 0) return false; + if (attr_flags & XFS_ATTR_PARENT) + return xfs_attr_parent_match(args, value, valuelen); + return true; } @@ -539,6 +567,13 @@ xfs_attr_copy_value( unsigned char *value, int valuelen) { + /* + * Parent pointer lookups require the caller to specify the name and + * value, so don't copy anything. + */ + if (args->attr_filter & XFS_ATTR_PARENT) + return 0; + /* * No copy if all we have to do is get the length */ @@ -748,7 +783,8 @@ xfs_attr_sf_findname( sfe < xfs_attr_sf_endptr(sf); sfe = xfs_attr_sf_nextentry(sfe)) { if (xfs_attr_match(args, sfe->flags, sfe->nameval, - sfe->namelen)) + sfe->namelen, &sfe->nameval[sfe->namelen], + sfe->valuelen)) return sfe; } @@ -2444,18 +2480,22 @@ xfs_attr3_leaf_lookup_int( if (entry->flags & XFS_ATTR_LOCAL) { name_loc = xfs_attr3_leaf_name_local(leaf, probe); if (!xfs_attr_match(args, entry->flags, - name_loc->nameval, - name_loc->namelen)) + name_loc->nameval, name_loc->namelen, + &name_loc->nameval[name_loc->namelen], + be16_to_cpu(name_loc->valuelen))) continue; args->index = probe; return -EEXIST; } else { + unsigned int valuelen; + name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); + valuelen = be32_to_cpu(name_rmt->valuelen); if (!xfs_attr_match(args, entry->flags, name_rmt->name, - name_rmt->namelen)) + name_rmt->namelen, NULL, valuelen)) continue; args->index = probe; - args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); + args->rmtvaluelen = valuelen; args->rmtblkno = be32_to_cpu(name_rmt->valueblk); args->rmtblkcnt = xfs_attr3_rmt_blocks( args->dp->i_mount, From a918f5f2cd2c9d2bf94f485c5cebbf47fb0627df Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:41 -0700 Subject: [PATCH 08/30] xfs: refactor xfs_is_using_logged_xattrs checks in attr item recovery Move this feature check down to the per-op checks so that we can ensure that we never see parent pointer attr items on non-pptr filesystems, and that logged xattrs are turned on for non-pptr attr items. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_attr_item.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 4a57bcff49eb..413e3d3959a5 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -480,9 +480,6 @@ xfs_attri_validate( { unsigned int op = xfs_attr_log_item_op(attrp); - if (!xfs_is_using_logged_xattrs(mp)) - return false; - if (attrp->__pad != 0) return false; @@ -499,12 +496,16 @@ xfs_attri_validate( switch (op) { case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: + if (!xfs_is_using_logged_xattrs(mp)) + return false; if (attrp->alfi_value_len > XATTR_SIZE_MAX) return false; if (!xfs_attri_validate_namelen(attrp->alfi_name_len)) return false; break; case XFS_ATTRI_OP_FLAGS_REMOVE: + if (!xfs_is_using_logged_xattrs(mp)) + return false; if (attrp->alfi_value_len != 0) return false; if (!xfs_attri_validate_namelen(attrp->alfi_name_len)) From 5773f7f82be5aa98e4883566072d33342814cebe Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:42 -0700 Subject: [PATCH 09/30] xfs: create attr log item opcodes and formats for parent pointers Make the necessary alterations to the extended attribute log intent item ondisk format so that we can log parent pointer operations. This requires the creation of new opcodes specific to parent pointers, and a new four-argument replace operation to handle renames. At this point this part of the patchset has changed so much from what Allison original wrote that I no longer think her SoB applies. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 19 +++ fs/xfs/libxfs/xfs_attr.h | 4 +- fs/xfs/libxfs/xfs_da_btree.h | 4 + fs/xfs/libxfs/xfs_log_format.h | 22 ++- fs/xfs/xfs_attr_item.c | 259 ++++++++++++++++++++++++++++++--- fs/xfs/xfs_attr_item.h | 2 + 6 files changed, 284 insertions(+), 26 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 8262c263be9d..78c87c405e33 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -439,6 +439,23 @@ xfs_attr_hashval( return xfs_attr_hashname(name, namelen); } +/* + * PPTR_REPLACE operations require the caller to set the old and new names and + * values explicitly. Update the canonical fields to the new name and value + * here now that the removal phase has finished. + */ +static void +xfs_attr_update_pptr_replace_args( + struct xfs_da_args *args) +{ + ASSERT(args->new_namelen > 0); + args->name = args->new_name; + args->namelen = args->new_namelen; + args->value = args->new_value; + args->valuelen = args->new_valuelen; + xfs_attr_sethash(args); +} + /* * Handle the state change on completion of a multi-state attr operation. * @@ -459,6 +476,8 @@ xfs_attr_complete_op( if (!(args->op_flags & XFS_DA_OP_REPLACE)) replace_state = XFS_DAS_DONE; + else if (xfs_attr_intent_op(attr) == XFS_ATTRI_OP_FLAGS_PPTR_REPLACE) + xfs_attr_update_pptr_replace_args(args); args->op_flags &= ~XFS_DA_OP_REPLACE; args->attr_filter &= ~XFS_ATTR_INCOMPLETE; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index c63b1d610e53..d0ed7ea58ab0 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -510,8 +510,8 @@ struct xfs_attr_intent { struct xfs_da_args *xattri_da_args; /* - * Shared buffer containing the attr name and value so that the logging - * code can share large memory buffers between log items. + * Shared buffer containing the attr name, new name, and value so that + * the logging code can share large memory buffers between log items. */ struct xfs_attri_log_nameval *xattri_nameval; diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 17cef594b5bb..354d5d65043e 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -55,7 +55,9 @@ enum xfs_dacmp { typedef struct xfs_da_args { struct xfs_da_geometry *geo; /* da block geometry */ const uint8_t *name; /* string (maybe not NULL terminated) */ + const uint8_t *new_name; /* new attr name */ void *value; /* set of bytes (maybe contain NULLs) */ + void *new_value; /* new xattr value (may contain NULLs) */ struct xfs_inode *dp; /* directory inode to manipulate */ struct xfs_trans *trans; /* current trans (changes over time) */ @@ -63,10 +65,12 @@ typedef struct xfs_da_args { xfs_ino_t owner; /* inode that owns the dir/attr data */ int valuelen; /* length of value */ + int new_valuelen; /* length of new_value */ uint8_t filetype; /* filetype of inode for directories */ uint8_t op_flags; /* operation flags */ uint8_t attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */ short namelen; /* length of string (maybe no NULL) */ + short new_namelen; /* length of new attr name */ xfs_dahash_t hashval; /* hash value of name */ xfs_extlen_t total; /* total blocks needed, for 1st bmap */ int whichfork; /* data or attribute fork */ diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 020aebd10143..632dd9732455 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -115,11 +115,13 @@ struct xfs_unmount_log_format { #define XLOG_REG_TYPE_BUD_FORMAT 26 #define XLOG_REG_TYPE_ATTRI_FORMAT 27 #define XLOG_REG_TYPE_ATTRD_FORMAT 28 -#define XLOG_REG_TYPE_ATTR_NAME 29 +#define XLOG_REG_TYPE_ATTR_NAME 29 #define XLOG_REG_TYPE_ATTR_VALUE 30 #define XLOG_REG_TYPE_XMI_FORMAT 31 #define XLOG_REG_TYPE_XMD_FORMAT 32 -#define XLOG_REG_TYPE_MAX 32 +#define XLOG_REG_TYPE_ATTR_NEWNAME 33 +#define XLOG_REG_TYPE_ATTR_NEWVALUE 34 +#define XLOG_REG_TYPE_MAX 34 /* * Flags to log operation header @@ -1026,6 +1028,9 @@ struct xfs_icreate_log { #define XFS_ATTRI_OP_FLAGS_SET 1 /* Set the attribute */ #define XFS_ATTRI_OP_FLAGS_REMOVE 2 /* Remove the attribute */ #define XFS_ATTRI_OP_FLAGS_REPLACE 3 /* Replace the attribute */ +#define XFS_ATTRI_OP_FLAGS_PPTR_SET 4 /* Set parent pointer */ +#define XFS_ATTRI_OP_FLAGS_PPTR_REMOVE 5 /* Remove parent pointer */ +#define XFS_ATTRI_OP_FLAGS_PPTR_REPLACE 6 /* Replace parent pointer */ #define XFS_ATTRI_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */ /* @@ -1048,7 +1053,18 @@ struct xfs_attri_log_format { uint64_t alfi_id; /* attri identifier */ uint64_t alfi_ino; /* the inode for this attr operation */ uint32_t alfi_op_flags; /* marks the op as a set or remove */ - uint32_t alfi_name_len; /* attr name length */ + union { + uint32_t alfi_name_len; /* attr name length */ + struct { + /* + * For PPTR_REPLACE, these are the lengths of the old + * and new attr names. The new and old values must + * have the same length. + */ + uint16_t alfi_old_name_len; + uint16_t alfi_new_name_len; + }; + }; uint32_t alfi_value_len; /* attr value length */ uint32_t alfi_attr_filter;/* attr filter flags */ }; diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 413e3d3959a5..be5064f5a531 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -73,8 +73,12 @@ static inline struct xfs_attri_log_nameval * xfs_attri_log_nameval_alloc( const void *name, unsigned int name_len, + const void *new_name, + unsigned int new_name_len, const void *value, - unsigned int value_len) + unsigned int value_len, + const void *new_value, + unsigned int new_value_len) { struct xfs_attri_log_nameval *nv; @@ -83,15 +87,26 @@ xfs_attri_log_nameval_alloc( * this. But kvmalloc() utterly sucks, so we use our own version. */ nv = xlog_kvmalloc(sizeof(struct xfs_attri_log_nameval) + - name_len + value_len); + name_len + new_name_len + value_len + + new_value_len); nv->name.i_addr = nv + 1; nv->name.i_len = name_len; nv->name.i_type = XLOG_REG_TYPE_ATTR_NAME; memcpy(nv->name.i_addr, name, name_len); + if (new_name_len) { + nv->new_name.i_addr = nv->name.i_addr + name_len; + nv->new_name.i_len = new_name_len; + memcpy(nv->new_name.i_addr, new_name, new_name_len); + } else { + nv->new_name.i_addr = NULL; + nv->new_name.i_len = 0; + } + nv->new_name.i_type = XLOG_REG_TYPE_ATTR_NEWNAME; + if (value_len) { - nv->value.i_addr = nv->name.i_addr + name_len; + nv->value.i_addr = nv->name.i_addr + name_len + new_name_len; nv->value.i_len = value_len; memcpy(nv->value.i_addr, value, value_len); } else { @@ -100,6 +115,17 @@ xfs_attri_log_nameval_alloc( } nv->value.i_type = XLOG_REG_TYPE_ATTR_VALUE; + if (new_value_len) { + nv->new_value.i_addr = nv->name.i_addr + name_len + + new_name_len + value_len; + nv->new_value.i_len = new_value_len; + memcpy(nv->new_value.i_addr, new_value, new_value_len); + } else { + nv->new_value.i_addr = NULL; + nv->new_value.i_len = 0; + } + nv->new_value.i_type = XLOG_REG_TYPE_ATTR_NEWVALUE; + refcount_set(&nv->refcount, 1); return nv; } @@ -145,11 +171,20 @@ xfs_attri_item_size( *nbytes += sizeof(struct xfs_attri_log_format) + xlog_calc_iovec_len(nv->name.i_len); - if (!nv->value.i_len) - return; + if (nv->new_name.i_len) { + *nvecs += 1; + *nbytes += xlog_calc_iovec_len(nv->new_name.i_len); + } - *nvecs += 1; - *nbytes += xlog_calc_iovec_len(nv->value.i_len); + if (nv->value.i_len) { + *nvecs += 1; + *nbytes += xlog_calc_iovec_len(nv->value.i_len); + } + + if (nv->new_value.i_len) { + *nvecs += 1; + *nbytes += xlog_calc_iovec_len(nv->new_value.i_len); + } } /* @@ -179,15 +214,28 @@ xfs_attri_item_format( ASSERT(nv->name.i_len > 0); attrip->attri_format.alfi_size++; + if (nv->new_name.i_len > 0) + attrip->attri_format.alfi_size++; + if (nv->value.i_len > 0) attrip->attri_format.alfi_size++; + if (nv->new_value.i_len > 0) + attrip->attri_format.alfi_size++; + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT, &attrip->attri_format, sizeof(struct xfs_attri_log_format)); xlog_copy_from_iovec(lv, &vecp, &nv->name); + + if (nv->new_name.i_len > 0) + xlog_copy_from_iovec(lv, &vecp, &nv->new_name); + if (nv->value.i_len > 0) xlog_copy_from_iovec(lv, &vecp, &nv->value); + + if (nv->new_value.i_len > 0) + xlog_copy_from_iovec(lv, &vecp, &nv->new_value); } /* @@ -322,6 +370,8 @@ xfs_attr_log_item( const struct xfs_attr_intent *attr) { struct xfs_attri_log_format *attrp; + struct xfs_attri_log_nameval *nv = attr->xattri_nameval; + struct xfs_da_args *args = attr->xattri_da_args; /* * At this point the xfs_attr_intent has been constructed, and we've @@ -329,13 +379,25 @@ xfs_attr_log_item( * structure with fields from this xfs_attr_intent */ attrp = &attrip->attri_format; - attrp->alfi_ino = attr->xattri_da_args->dp->i_ino; + attrp->alfi_ino = args->dp->i_ino; ASSERT(!(attr->xattri_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK)); attrp->alfi_op_flags = attr->xattri_op_flags; - attrp->alfi_value_len = attr->xattri_nameval->value.i_len; - attrp->alfi_name_len = attr->xattri_nameval->name.i_len; - ASSERT(!(attr->xattri_da_args->attr_filter & ~XFS_ATTRI_FILTER_MASK)); - attrp->alfi_attr_filter = attr->xattri_da_args->attr_filter; + attrp->alfi_value_len = nv->value.i_len; + + switch (xfs_attr_log_item_op(attrp)) { + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + ASSERT(nv->value.i_len == nv->new_value.i_len); + + attrp->alfi_old_name_len = nv->name.i_len; + attrp->alfi_new_name_len = nv->new_name.i_len; + break; + default: + attrp->alfi_name_len = nv->name.i_len; + break; + } + + ASSERT(!(args->attr_filter & ~XFS_ATTRI_FILTER_MASK)); + attrp->alfi_attr_filter = args->attr_filter; } /* Get an ATTRI. */ @@ -374,8 +436,11 @@ xfs_attr_create_intent( * Transfer our reference to the name/value buffer to the * deferred work state structure. */ - attr->xattri_nameval = xfs_attri_log_nameval_alloc(args->name, - args->namelen, args->value, args->valuelen); + attr->xattri_nameval = xfs_attri_log_nameval_alloc( + args->name, args->namelen, + args->new_name, args->new_namelen, + args->value, args->valuelen, + args->new_value, args->new_valuelen); } attrip = xfs_attri_init(mp, attr->xattri_nameval); @@ -494,6 +559,17 @@ xfs_attri_validate( return false; switch (op) { + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + if (!xfs_has_parent(mp)) + return false; + if (attrp->alfi_value_len != sizeof(struct xfs_parent_rec)) + return false; + if (!xfs_attri_validate_namelen(attrp->alfi_name_len)) + return false; + if (!(attrp->alfi_attr_filter & XFS_ATTR_PARENT)) + return false; + break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: if (!xfs_is_using_logged_xattrs(mp)) @@ -511,6 +587,18 @@ xfs_attri_validate( if (!xfs_attri_validate_namelen(attrp->alfi_name_len)) return false; break; + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + if (!xfs_has_parent(mp)) + return false; + if (!xfs_attri_validate_namelen(attrp->alfi_old_name_len)) + return false; + if (!xfs_attri_validate_namelen(attrp->alfi_new_name_len)) + return false; + if (attrp->alfi_value_len != sizeof(struct xfs_parent_rec)) + return false; + if (!(attrp->alfi_attr_filter & XFS_ATTR_PARENT)) + return false; + break; default: return false; } @@ -583,8 +671,12 @@ xfs_attri_recover_work( args->whichfork = XFS_ATTR_FORK; args->name = nv->name.i_addr; args->namelen = nv->name.i_len; + args->new_name = nv->new_name.i_addr; + args->new_namelen = nv->new_name.i_len; args->value = nv->value.i_addr; args->valuelen = nv->value.i_len; + args->new_value = nv->new_value.i_addr; + args->new_valuelen = nv->new_value.i_len; args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK; args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT | XFS_DA_OP_LOGGED; @@ -592,6 +684,8 @@ xfs_attri_recover_work( xfs_attr_sethash(args); switch (xfs_attr_intent_op(attr)) { + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: args->total = xfs_attr_calc_size(args, &local); @@ -600,6 +694,7 @@ xfs_attri_recover_work( else attr->xattri_dela_state = xfs_attr_init_add_state(args); break; + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: case XFS_ATTRI_OP_FLAGS_REMOVE: attr->xattri_dela_state = xfs_attr_init_remove_state(args); break; @@ -700,7 +795,17 @@ xfs_attr_relog_intent( new_attrp->alfi_ino = old_attrp->alfi_ino; new_attrp->alfi_op_flags = old_attrp->alfi_op_flags; new_attrp->alfi_value_len = old_attrp->alfi_value_len; - new_attrp->alfi_name_len = old_attrp->alfi_name_len; + + switch (xfs_attr_log_item_op(old_attrp)) { + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + new_attrp->alfi_new_name_len = old_attrp->alfi_new_name_len; + new_attrp->alfi_old_name_len = old_attrp->alfi_old_name_len; + break; + default: + new_attrp->alfi_name_len = old_attrp->alfi_name_len; + break; + } + new_attrp->alfi_attr_filter = old_attrp->alfi_attr_filter; return &new_attrip->attri_item; @@ -734,22 +839,61 @@ xfs_attr_defer_add( enum xfs_attr_defer_op op) { struct xfs_attr_intent *new; + unsigned int log_op = 0; + bool is_pptr = args->attr_filter & XFS_ATTR_PARENT; + + if (is_pptr) { + ASSERT(xfs_has_parent(args->dp->i_mount)); + ASSERT((args->attr_filter & ~XFS_ATTR_PARENT) == 0); + ASSERT(args->op_flags & XFS_DA_OP_LOGGED); + ASSERT(args->valuelen == sizeof(struct xfs_parent_rec)); + } new = kmem_cache_zalloc(xfs_attr_intent_cache, GFP_NOFS | __GFP_NOFAIL); new->xattri_da_args = args; + /* Compute log operation from the higher level op and namespace. */ switch (op) { case XFS_ATTR_DEFER_SET: - new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_SET; - new->xattri_dela_state = xfs_attr_init_add_state(args); + if (is_pptr) + log_op = XFS_ATTRI_OP_FLAGS_PPTR_SET; + else + log_op = XFS_ATTRI_OP_FLAGS_SET; break; case XFS_ATTR_DEFER_REPLACE: - new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_REPLACE; - new->xattri_dela_state = xfs_attr_init_replace_state(args); + if (is_pptr) + log_op = XFS_ATTRI_OP_FLAGS_PPTR_REPLACE; + else + log_op = XFS_ATTRI_OP_FLAGS_REPLACE; break; case XFS_ATTR_DEFER_REMOVE: - new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_REMOVE; + if (is_pptr) + log_op = XFS_ATTRI_OP_FLAGS_PPTR_REMOVE; + else + log_op = XFS_ATTRI_OP_FLAGS_REMOVE; + break; + default: + ASSERT(0); + break; + } + new->xattri_op_flags = log_op; + + /* Set up initial attr operation state. */ + switch (log_op) { + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + case XFS_ATTRI_OP_FLAGS_SET: + new->xattri_dela_state = xfs_attr_init_add_state(args); + break; + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + ASSERT(args->new_valuelen == args->valuelen); + new->xattri_dela_state = xfs_attr_init_replace_state(args); + break; + case XFS_ATTRI_OP_FLAGS_REPLACE: + new->xattri_dela_state = xfs_attr_init_replace_state(args); + break; + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + case XFS_ATTRI_OP_FLAGS_REMOVE: new->xattri_dela_state = xfs_attr_init_remove_state(args); break; } @@ -824,9 +968,13 @@ xlog_recover_attri_commit_pass2( struct xfs_attri_log_nameval *nv; const void *attr_name; const void *attr_value = NULL; + const void *attr_new_name = NULL; + const void *attr_new_value = NULL; size_t len; unsigned int name_len = 0; unsigned int value_len = 0; + unsigned int new_name_len = 0; + unsigned int new_value_len = 0; unsigned int op, i = 0; /* Validate xfs_attri_log_format before the large memory allocation */ @@ -847,6 +995,17 @@ xlog_recover_attri_commit_pass2( /* Check the number of log iovecs makes sense for the op code. */ op = xfs_attr_log_item_op(attri_formatp); switch (op) { + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + /* Log item, attr name, attr value */ + if (item->ri_total != 3) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + name_len = attri_formatp->alfi_name_len; + value_len = attri_formatp->alfi_value_len; + break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: /* Log item, attr name, attr value */ @@ -867,6 +1026,20 @@ xlog_recover_attri_commit_pass2( } name_len = attri_formatp->alfi_name_len; break; + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + /* + * Log item, attr name, new attr name, attr value, new attr + * value + */ + if (item->ri_total != 5) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + name_len = attri_formatp->alfi_old_name_len; + new_name_len = attri_formatp->alfi_new_name_len; + new_value_len = value_len = attri_formatp->alfi_value_len; + break; default: XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); @@ -881,6 +1054,16 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; i++; + /* Validate the new attr name */ + if (new_name_len > 0) { + attr_new_name = xfs_attri_validate_name_iovec(mp, + attri_formatp, &item->ri_buf[i], + new_name_len); + if (!attr_new_name) + return -EFSCORRUPTED; + i++; + } + /* Validate the attr value, if present */ if (value_len != 0) { attr_value = xfs_attri_validate_value_iovec(mp, attri_formatp, @@ -890,6 +1073,16 @@ xlog_recover_attri_commit_pass2( i++; } + /* Validate the new attr value, if present */ + if (new_value_len != 0) { + attr_new_value = xfs_attri_validate_value_iovec(mp, + attri_formatp, &item->ri_buf[i], + new_value_len); + if (!attr_new_value) + return -EFSCORRUPTED; + i++; + } + /* * Make sure we got the correct number of buffers for the operation * that we just loaded. @@ -909,12 +1102,17 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; } fallthrough; + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + case XFS_ATTRI_OP_FLAGS_PPTR_SET: case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: /* * Regular xattr set/remove/replace operations require a name * and do not take a newname. Values are optional for set and * replace. + * + * Name-value set/remove operations must have a name, do not + * take a newname, and can take a value. */ if (attr_name == NULL || name_len == 0) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, @@ -922,6 +1120,23 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; } break; + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + /* + * Name-value replace operations require the caller to + * specify the old and new names and values explicitly. + * Values are optional. + */ + if (attr_name == NULL || name_len == 0) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + if (attr_new_name == NULL || new_name_len == 0) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + break; } /* @@ -930,7 +1145,9 @@ xlog_recover_attri_commit_pass2( * reference. */ nv = xfs_attri_log_nameval_alloc(attr_name, name_len, - attr_value, value_len); + attr_new_name, new_name_len, + attr_value, value_len, + attr_new_value, new_value_len); attrip = xfs_attri_init(mp, nv); memcpy(&attrip->attri_format, attri_formatp, len); diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h index c32b669b0e16..e74128cbb722 100644 --- a/fs/xfs/xfs_attr_item.h +++ b/fs/xfs/xfs_attr_item.h @@ -13,7 +13,9 @@ struct kmem_zone; struct xfs_attri_log_nameval { struct xfs_log_iovec name; + struct xfs_log_iovec new_name; /* PPTR_REPLACE only */ struct xfs_log_iovec value; + struct xfs_log_iovec new_value; /* PPTR_REPLACE only */ refcount_t refcount; /* name and value follow the end of this struct */ From ae673f534a30976ce5e709c4535a59c12b786ef3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:43 -0700 Subject: [PATCH 10/30] xfs: record inode generation in xattr update log intent items For parent pointer updates, record the i_generation of the file that is being updated so that we don't accidentally jump generations. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_log_format.h | 2 +- fs/xfs/xfs_attr_item.c | 33 +++++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 632dd9732455..3e6682ed656b 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -1049,7 +1049,7 @@ struct xfs_icreate_log { struct xfs_attri_log_format { uint16_t alfi_type; /* attri log item type */ uint16_t alfi_size; /* size of this item */ - uint32_t __pad; /* pad to 64 bit aligned */ + uint32_t alfi_igen; /* generation of alfi_ino for pptr ops */ uint64_t alfi_id; /* attri identifier */ uint64_t alfi_ino; /* the inode for this attr operation */ uint32_t alfi_op_flags; /* marks the op as a set or remove */ diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index be5064f5a531..2898eeb16366 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -388,9 +388,14 @@ xfs_attr_log_item( case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: ASSERT(nv->value.i_len == nv->new_value.i_len); + attrp->alfi_igen = VFS_I(args->dp)->i_generation; attrp->alfi_old_name_len = nv->name.i_len; attrp->alfi_new_name_len = nv->new_name.i_len; break; + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + attrp->alfi_igen = VFS_I(args->dp)->i_generation; + fallthrough; default: attrp->alfi_name_len = nv->name.i_len; break; @@ -545,9 +550,6 @@ xfs_attri_validate( { unsigned int op = xfs_attr_log_item_op(attrp); - if (attrp->__pad != 0) - return false; - if (attrp->alfi_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK) return false; @@ -639,9 +641,27 @@ xfs_attri_recover_work( int local; int error; - error = xlog_recover_iget(mp, attrp->alfi_ino, &ip); - if (error) - return ERR_PTR(error); + /* + * Parent pointer attr items record the generation but regular logged + * xattrs do not; select the right iget function. + */ + switch (xfs_attr_log_item_op(attrp)) { + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + error = xlog_recover_iget_handle(mp, attrp->alfi_ino, + attrp->alfi_igen, &ip); + break; + default: + error = xlog_recover_iget(mp, attrp->alfi_ino, &ip); + break; + } + if (error) { + xfs_irele(ip); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp, + sizeof(*attrp)); + return ERR_PTR(-EFSCORRUPTED); + } if (xfs_inode_has_attr_fork(ip)) { error = xfs_attri_iread_extents(ip); @@ -793,6 +813,7 @@ xfs_attr_relog_intent( new_attrp = &new_attrip->attri_format; new_attrp->alfi_ino = old_attrp->alfi_ino; + new_attrp->alfi_igen = old_attrp->alfi_igen; new_attrp->alfi_op_flags = old_attrp->alfi_op_flags; new_attrp->alfi_value_len = old_attrp->alfi_value_len; From 297da63379c6cba504a33aa7c526f36b148d4610 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:43 -0700 Subject: [PATCH 11/30] xfs: Expose init_xattrs in xfs_create_tmpfile Tmp files are used as part of rename operations and will need attr forks initialized for parent pointers. Expose the init_xattrs parameter to the calling function to initialize the fork. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 5 +++-- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_iops.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3c843223b4ed..060e4e767b51 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1185,6 +1185,7 @@ xfs_create_tmpfile( struct mnt_idmap *idmap, struct xfs_inode *dp, umode_t mode, + bool init_xattrs, struct xfs_inode **ipp) { struct xfs_mount *mp = dp->i_mount; @@ -1225,7 +1226,7 @@ xfs_create_tmpfile( error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); if (!error) error = xfs_init_new_inode(idmap, tp, dp, ino, mode, - 0, 0, prid, false, &ip); + 0, 0, prid, init_xattrs, &ip); if (error) goto out_trans_cancel; @@ -3037,7 +3038,7 @@ xfs_rename_alloc_whiteout( int error; error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE, - &tmpfile); + false, &tmpfile); if (error) return error; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a6da1ab8ab13..04a91e312993 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -522,7 +522,7 @@ int xfs_create(struct mnt_idmap *idmap, umode_t mode, dev_t rdev, bool need_xattr, struct xfs_inode **ipp); int xfs_create_tmpfile(struct mnt_idmap *idmap, - struct xfs_inode *dp, umode_t mode, + struct xfs_inode *dp, umode_t mode, bool init_xattrs, struct xfs_inode **ipp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 659fd10c0cda..d32322f9ecde 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -201,7 +201,7 @@ xfs_generic_create( xfs_create_need_xattr(dir, default_acl, acl), &ip); } else { - error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, &ip); + error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, false, &ip); } if (unlikely(error)) goto out_free_acl; From a08d6729637428b6ef8c6a5a94d8c6db7b805a44 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:44 -0700 Subject: [PATCH 12/30] xfs: add parent pointer validator functions The attr name of a parent pointer is a string, and the attr value of a parent pointer is (more or less) a file handle. So we need to modify attr_namecheck to verify the parent pointer name, and add a xfs_parent_valuecheck function to sanitize the handle. At the same time, we need to validate attr values during log recovery if the xattr is really a parent pointer. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong [djwong: move functions to xfs_parent.c, adjust for new disk format] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_attr.c | 5 +++ fs/xfs/libxfs/xfs_parent.c | 92 ++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_parent.h | 15 +++++++ fs/xfs/xfs_attr_item.c | 10 +++++ 5 files changed, 123 insertions(+) create mode 100644 fs/xfs/libxfs/xfs_parent.c create mode 100644 fs/xfs/libxfs/xfs_parent.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 4e1eb3b6dbc4..4956ea9a307b 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -42,6 +42,7 @@ xfs-y += $(addprefix libxfs/, \ xfs_inode_buf.o \ xfs_log_rlimit.o \ xfs_ag_resv.o \ + xfs_parent.o \ xfs_rmap.o \ xfs_rmap_btree.o \ xfs_refcount.o \ diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 78c87c405e33..93524efa6e56 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -26,6 +26,7 @@ #include "xfs_trace.h" #include "xfs_attr_item.h" #include "xfs_xattr.h" +#include "xfs_parent.h" struct kmem_cache *xfs_attr_intent_cache; @@ -1568,6 +1569,10 @@ xfs_attr_namecheck( if (length >= MAXNAMELEN) return false; + /* Parent pointers have their own validation. */ + if (attr_flags & XFS_ATTR_PARENT) + return xfs_parent_namecheck(attr_flags, name, length); + /* There shouldn't be any nulls here */ return !memchr(name, 0, length); } diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c new file mode 100644 index 000000000000..5961fa8c8561 --- /dev/null +++ b/fs/xfs/libxfs/xfs_parent.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022-2024 Oracle. + * All rights reserved. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_da_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_attr_sf.h" +#include "xfs_bmap.h" +#include "xfs_defer.h" +#include "xfs_log.h" +#include "xfs_xattr.h" +#include "xfs_parent.h" +#include "xfs_trans_space.h" + +/* + * Parent pointer attribute handling. + * + * Because the attribute name is a filename component, it will never be longer + * than 255 bytes and must not contain nulls or slashes. These are roughly the + * same constraints that apply to attribute names. + * + * The attribute value must always be a struct xfs_parent_rec. This means the + * attribute will never be in remote format because 12 bytes is nowhere near + * xfs_attr_leaf_entsize_local_max() (~75% of block size). + * + * Creating a new parent attribute will always create a new attribute - there + * should never, ever be an existing attribute in the tree for a new inode. + * ENOSPC behavior is problematic - creating the inode without the parent + * pointer is effectively a corruption, so we allow parent attribute creation + * to dip into the reserve block pool to avoid unexpected ENOSPC errors from + * occurring. + */ + +/* Return true if parent pointer attr name is valid. */ +bool +xfs_parent_namecheck( + unsigned int attr_flags, + const void *name, + size_t length) +{ + /* + * Parent pointers always use logged operations, so there should never + * be incomplete xattrs. + */ + if (attr_flags & XFS_ATTR_INCOMPLETE) + return false; + + return xfs_dir2_namecheck(name, length); +} + +/* Return true if parent pointer attr value is valid. */ +bool +xfs_parent_valuecheck( + struct xfs_mount *mp, + const void *value, + size_t valuelen) +{ + const struct xfs_parent_rec *rec = value; + + if (!xfs_has_parent(mp)) + return false; + + /* The xattr value must be a parent record. */ + if (valuelen != sizeof(struct xfs_parent_rec)) + return false; + + /* The parent record must be local. */ + if (value == NULL) + return false; + + /* The parent inumber must be valid. */ + if (!xfs_verify_dir_ino(mp, be64_to_cpu(rec->p_ino))) + return false; + + return true; +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h new file mode 100644 index 000000000000..ef8aff860780 --- /dev/null +++ b/fs/xfs/libxfs/xfs_parent.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022-2024 Oracle. + * All Rights Reserved. + */ +#ifndef __XFS_PARENT_H__ +#define __XFS_PARENT_H__ + +/* Metadata validators */ +bool xfs_parent_namecheck(unsigned int attr_flags, const void *name, + size_t length); +bool xfs_parent_valuecheck(struct xfs_mount *mp, const void *value, + size_t valuelen); + +#endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 2898eeb16366..2b10ac4c5fce 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -27,6 +27,7 @@ #include "xfs_error.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" +#include "xfs_parent.h" struct kmem_cache *xfs_attri_cache; struct kmem_cache *xfs_attrd_cache; @@ -973,6 +974,15 @@ xfs_attri_validate_value_iovec( return NULL; } + if ((attri_formatp->alfi_attr_filter & XFS_ATTR_PARENT) && + !xfs_parent_valuecheck(mp, iovec->i_addr, value_len)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, sizeof(*attri_formatp)); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + iovec->i_addr, iovec->i_len); + return NULL; + } + return iovec->i_addr; } From 7dba4a5fe1c5cdf0859830380c52f29295cbf345 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:45 -0700 Subject: [PATCH 13/30] xfs: extend transaction reservations for parent attributes We need to add, remove or modify parent pointer attributes during create/link/unlink/rename operations atomically with the dirents in the parent directories being modified. This means they need to be modified in the same transaction as the parent directories, and so we need to add the required space for the attribute modifications to the transaction reservations. Signed-off-by: Dave Chinner Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong [djwong: fix indenting errors, adjust for new log format] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_trans_resv.c | 328 +++++++++++++++++++++++++++------ 1 file changed, 275 insertions(+), 53 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6cd45e8c118d..6dbe6e7251e7 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -20,6 +20,9 @@ #include "xfs_qm.h" #include "xfs_trans_space.h" #include "xfs_rtbitmap.h" +#include "xfs_attr_item.h" +#include "xfs_log.h" +#include "xfs_da_format.h" #define _ALLOC true #define _FREE false @@ -422,29 +425,110 @@ xfs_calc_itruncate_reservation_minlogsize( return xfs_calc_itruncate_reservation(mp, true); } +static inline unsigned int xfs_calc_pptr_link_overhead(void) +{ + return sizeof(struct xfs_attri_log_format) + + xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) + + xlog_calc_iovec_len(MAXNAMELEN - 1); +} +static inline unsigned int xfs_calc_pptr_unlink_overhead(void) +{ + return sizeof(struct xfs_attri_log_format) + + xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) + + xlog_calc_iovec_len(MAXNAMELEN - 1); +} +static inline unsigned int xfs_calc_pptr_replace_overhead(void) +{ + return sizeof(struct xfs_attri_log_format) + + xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) + + xlog_calc_iovec_len(MAXNAMELEN - 1) + + xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) + + xlog_calc_iovec_len(MAXNAMELEN - 1); +} + /* * In renaming a files we can modify: * the five inodes involved: 5 * inode size * the two directory btrees: 2 * (max depth + v2) * dir block size * the two directory bmap btrees: 2 * max depth * block size * And the bmap_finish transaction can free dir and bmap blocks (two sets - * of bmap blocks) giving: + * of bmap blocks) giving (t2): * the agf for the ags in which the blocks live: 3 * sector size * the agfl for the ags in which the blocks live: 3 * sector size * the superblock for the free block count: sector size * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size + * If parent pointers are enabled (t3), then each transaction in the chain + * must be capable of setting or removing the extended attribute + * containing the parent information. It must also be able to handle + * the three xattr intent items that track the progress of the parent + * pointer update. */ STATIC uint xfs_calc_rename_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + - max((xfs_calc_inode_res(mp, 5) + - xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3), - XFS_FSB_TO_B(mp, 1)))); + unsigned int overhead = XFS_DQUOT_LOGRES(mp); + struct xfs_trans_resv *resp = M_RES(mp); + unsigned int t1, t2, t3 = 0; + + t1 = xfs_calc_inode_res(mp, 5) + + xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1)); + + t2 = xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3), + XFS_FSB_TO_B(mp, 1)); + + if (xfs_has_parent(mp)) { + unsigned int rename_overhead, exchange_overhead; + + t3 = max(resp->tr_attrsetm.tr_logres, + resp->tr_attrrm.tr_logres); + + /* + * For a standard rename, the three xattr intent log items + * are (1) replacing the pptr for the source file; (2) + * removing the pptr on the dest file; and (3) adding a + * pptr for the whiteout file in the src dir. + * + * For an RENAME_EXCHANGE, there are two xattr intent + * items to replace the pptr for both src and dest + * files. Link counts don't change and there is no + * whiteout. + * + * In the worst case we can end up relogging all log + * intent items to allow the log tail to move ahead, so + * they become overhead added to each transaction in a + * processing chain. + */ + rename_overhead = xfs_calc_pptr_replace_overhead() + + xfs_calc_pptr_unlink_overhead() + + xfs_calc_pptr_link_overhead(); + exchange_overhead = 2 * xfs_calc_pptr_replace_overhead(); + + overhead += max(rename_overhead, exchange_overhead); + } + + return overhead + max3(t1, t2, t3); +} + +static inline unsigned int +xfs_rename_log_count( + struct xfs_mount *mp, + struct xfs_trans_resv *resp) +{ + /* One for the rename, one more for freeing blocks */ + unsigned int ret = XFS_RENAME_LOG_COUNT; + + /* + * Pre-reserve enough log reservation to handle the transaction + * rolling needed to remove or add one parent pointer. + */ + if (xfs_has_parent(mp)) + ret += max(resp->tr_attrsetm.tr_logcount, + resp->tr_attrrm.tr_logcount); + + return ret; } /* @@ -461,6 +545,23 @@ xfs_calc_iunlink_remove_reservation( 2 * M_IGEO(mp)->inode_cluster_size; } +static inline unsigned int +xfs_link_log_count( + struct xfs_mount *mp, + struct xfs_trans_resv *resp) +{ + unsigned int ret = XFS_LINK_LOG_COUNT; + + /* + * Pre-reserve enough log reservation to handle the transaction + * rolling needed to add one parent pointer. + */ + if (xfs_has_parent(mp)) + ret += resp->tr_attrsetm.tr_logcount; + + return ret; +} + /* * For creating a link to an inode: * the parent directory inode: inode size @@ -477,14 +578,23 @@ STATIC uint xfs_calc_link_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + - xfs_calc_iunlink_remove_reservation(mp) + - max((xfs_calc_inode_res(mp, 2) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), - XFS_FSB_TO_B(mp, 1)))); + unsigned int overhead = XFS_DQUOT_LOGRES(mp); + struct xfs_trans_resv *resp = M_RES(mp); + unsigned int t1, t2, t3 = 0; + + overhead += xfs_calc_iunlink_remove_reservation(mp); + t1 = xfs_calc_inode_res(mp, 2) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); + t2 = xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); + + if (xfs_has_parent(mp)) { + t3 = resp->tr_attrsetm.tr_logres; + overhead += xfs_calc_pptr_link_overhead(); + } + + return overhead + max3(t1, t2, t3); } /* @@ -499,6 +609,23 @@ xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) M_IGEO(mp)->inode_cluster_size; } +static inline unsigned int +xfs_remove_log_count( + struct xfs_mount *mp, + struct xfs_trans_resv *resp) +{ + unsigned int ret = XFS_REMOVE_LOG_COUNT; + + /* + * Pre-reserve enough log reservation to handle the transaction + * rolling needed to add one parent pointer. + */ + if (xfs_has_parent(mp)) + ret += resp->tr_attrrm.tr_logcount; + + return ret; +} + /* * For removing a directory entry we can modify: * the parent directory inode: inode size @@ -515,14 +642,24 @@ STATIC uint xfs_calc_remove_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + - xfs_calc_iunlink_add_reservation(mp) + - max((xfs_calc_inode_res(mp, 2) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), - XFS_FSB_TO_B(mp, 1)))); + unsigned int overhead = XFS_DQUOT_LOGRES(mp); + struct xfs_trans_resv *resp = M_RES(mp); + unsigned int t1, t2, t3 = 0; + + overhead += xfs_calc_iunlink_add_reservation(mp); + + t1 = xfs_calc_inode_res(mp, 2) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); + t2 = xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), + XFS_FSB_TO_B(mp, 1)); + + if (xfs_has_parent(mp)) { + t3 = resp->tr_attrrm.tr_logres; + overhead += xfs_calc_pptr_unlink_overhead(); + } + + return overhead + max3(t1, t2, t3); } /* @@ -571,12 +708,40 @@ xfs_calc_icreate_resv_alloc( xfs_calc_finobt_res(mp); } -STATIC uint -xfs_calc_icreate_reservation(xfs_mount_t *mp) +static inline unsigned int +xfs_icreate_log_count( + struct xfs_mount *mp, + struct xfs_trans_resv *resp) { - return XFS_DQUOT_LOGRES(mp) + - max(xfs_calc_icreate_resv_alloc(mp), - xfs_calc_create_resv_modify(mp)); + unsigned int ret = XFS_CREATE_LOG_COUNT; + + /* + * Pre-reserve enough log reservation to handle the transaction + * rolling needed to add one parent pointer. + */ + if (xfs_has_parent(mp)) + ret += resp->tr_attrsetm.tr_logcount; + + return ret; +} + +STATIC uint +xfs_calc_icreate_reservation( + struct xfs_mount *mp) +{ + struct xfs_trans_resv *resp = M_RES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int t1, t2, t3 = 0; + + t1 = xfs_calc_icreate_resv_alloc(mp); + t2 = xfs_calc_create_resv_modify(mp); + + if (xfs_has_parent(mp)) { + t3 = resp->tr_attrsetm.tr_logres; + overhead += xfs_calc_pptr_link_overhead(); + } + + return overhead + max3(t1, t2, t3); } STATIC uint @@ -589,6 +754,23 @@ xfs_calc_create_tmpfile_reservation( return res + xfs_calc_iunlink_add_reservation(mp); } +static inline unsigned int +xfs_mkdir_log_count( + struct xfs_mount *mp, + struct xfs_trans_resv *resp) +{ + unsigned int ret = XFS_MKDIR_LOG_COUNT; + + /* + * Pre-reserve enough log reservation to handle the transaction + * rolling needed to add one parent pointer. + */ + if (xfs_has_parent(mp)) + ret += resp->tr_attrsetm.tr_logcount; + + return ret; +} + /* * Making a new directory is the same as creating a new file. */ @@ -599,6 +781,22 @@ xfs_calc_mkdir_reservation( return xfs_calc_icreate_reservation(mp); } +static inline unsigned int +xfs_symlink_log_count( + struct xfs_mount *mp, + struct xfs_trans_resv *resp) +{ + unsigned int ret = XFS_SYMLINK_LOG_COUNT; + + /* + * Pre-reserve enough log reservation to handle the transaction + * rolling needed to add one parent pointer. + */ + if (xfs_has_parent(mp)) + ret += resp->tr_attrsetm.tr_logcount; + + return ret; +} /* * Making a new symplink is the same as creating a new file, but @@ -911,6 +1109,52 @@ xfs_calc_sb_reservation( return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); } +/* + * Namespace reservations. + * + * These get tricky when parent pointers are enabled as we have attribute + * modifications occurring from within these transactions. Rather than confuse + * each of these reservation calculations with the conditional attribute + * reservations, add them here in a clear and concise manner. This requires that + * the attribute reservations have already been calculated. + * + * Note that we only include the static attribute reservation here; the runtime + * reservation will have to be modified by the size of the attributes being + * added/removed/modified. See the comments on the attribute reservation + * calculations for more details. + */ +STATIC void +xfs_calc_namespace_reservations( + struct xfs_mount *mp, + struct xfs_trans_resv *resp) +{ + ASSERT(resp->tr_attrsetm.tr_logres > 0); + + resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp); + resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp); + resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_link.tr_logres = xfs_calc_link_reservation(mp); + resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp); + resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp); + resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp); + resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp); + resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp); + resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); + resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp); + resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + + resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); + resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp); + resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; +} + void xfs_trans_resv_calc( struct xfs_mount *mp, @@ -930,35 +1174,11 @@ xfs_trans_resv_calc( resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp); - resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT; - resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_link.tr_logres = xfs_calc_link_reservation(mp); - resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT; - resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp); - resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT; - resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp); - resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; - resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - - resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); - resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; - resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_create_tmpfile.tr_logres = xfs_calc_create_tmpfile_reservation(mp); resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); - resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; - resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp); resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT; resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES; @@ -988,6 +1208,8 @@ xfs_trans_resv_calc( resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + xfs_calc_namespace_reservations(mp, resp); + /* * The following transactions are logged in logical format with * a default log count. From fb102fe7fe02e70f8a49cc7f74bc0769cdab2912 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:46 -0700 Subject: [PATCH 14/30] xfs: create a hashname function for parent pointers Although directory entry and parent pointer recordsets look very similar (name -> ino), there's one major difference between them: a file can be hardlinked from multiple parent directories with the same filename. This is common in shared container environments where a base directory tree might be hardlink-copied multiple times. IOWs the same 'ls' program might be hardlinked to multiple /srv/*/bin/ls paths. We don't want parent pointer operations to bog down on hash collisions between the same dirent name, so create a special hash function that mixes in the parent directory inode number. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 3 +++ fs/xfs/libxfs/xfs_parent.c | 47 ++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_parent.h | 5 ++++ fs/xfs/scrub/attr.c | 4 ++++ 4 files changed, 59 insertions(+) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 93524efa6e56..8c283e5c2470 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -437,6 +437,9 @@ xfs_attr_hashval( { ASSERT(xfs_attr_check_namespace(attr_flags)); + if (attr_flags & XFS_ATTR_PARENT) + return xfs_parent_hashattr(mp, name, namelen, value, valuelen); + return xfs_attr_hashname(name, namelen); } diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index 5961fa8c8561..d564baf2549c 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -90,3 +90,50 @@ xfs_parent_valuecheck( return true; } + +/* Compute the attribute name hash for a parent pointer. */ +xfs_dahash_t +xfs_parent_hashval( + struct xfs_mount *mp, + const uint8_t *name, + int namelen, + xfs_ino_t parent_ino) +{ + struct xfs_name xname = { + .name = name, + .len = namelen, + }; + + /* + * Use the same dirent name hash as would be used on the directory, but + * mix in the parent inode number to avoid collisions on hardlinked + * files with identical names but different parents. + */ + return xfs_dir2_hashname(mp, &xname) ^ + upper_32_bits(parent_ino) ^ lower_32_bits(parent_ino); +} + +/* Compute the attribute name hash from the xattr components. */ +xfs_dahash_t +xfs_parent_hashattr( + struct xfs_mount *mp, + const uint8_t *name, + int namelen, + const void *value, + int valuelen) +{ + const struct xfs_parent_rec *rec = value; + + /* Requires a local attr value in xfs_parent_rec format */ + if (valuelen != sizeof(struct xfs_parent_rec)) { + ASSERT(valuelen == sizeof(struct xfs_parent_rec)); + return 0; + } + + if (!value) { + ASSERT(value != NULL); + return 0; + } + + return xfs_parent_hashval(mp, name, namelen, be64_to_cpu(rec->p_ino)); +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index ef8aff860780..6a4028871b72 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -12,4 +12,9 @@ bool xfs_parent_namecheck(unsigned int attr_flags, const void *name, bool xfs_parent_valuecheck(struct xfs_mount *mp, const void *value, size_t valuelen); +xfs_dahash_t xfs_parent_hashval(struct xfs_mount *mp, const uint8_t *name, + int namelen, xfs_ino_t parent_ino); +xfs_dahash_t xfs_parent_hashattr(struct xfs_mount *mp, const uint8_t *name, + int namelen, const void *value, int valuelen); + #endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 22d7ef4df169..c07d050b39b2 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -536,6 +536,10 @@ xchk_xattr_rec( xchk_da_set_corrupt(ds, level); goto out; } + if (ent->flags & XFS_ATTR_PARENT) { + xchk_da_set_corrupt(ds, level); + goto out; + } calc_hash = xfs_attr_hashval(mp, ent->flags, rentry->name, rentry->namelen, NULL, be32_to_cpu(rentry->valuelen)); From b7c62d90c12c6cc86f10b8a62cefe0029374b6ff Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:47 -0700 Subject: [PATCH 15/30] xfs: parent pointer attribute creation Add parent pointer attribute during xfs_create, and subroutines to initialize attributes. Note that the xfs_attr_intent object contains a pointer to the caller's xfs_da_args object, so the latter must persist until transaction commit. Signed-off-by: Dave Chinner Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong [djwong: shorten names, adjust to new format, set init_xattrs for parent pointers] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_parent.c | 68 +++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_parent.h | 65 +++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_trans_space.c | 52 +++++++++++++++++++++++++ fs/xfs/libxfs/xfs_trans_space.h | 9 +++-- fs/xfs/scrub/tempfile.c | 2 +- fs/xfs/xfs_inode.c | 32 +++++++++++++--- fs/xfs/xfs_iops.c | 15 +++++++- fs/xfs/xfs_super.c | 10 +++++ 9 files changed, 242 insertions(+), 12 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_trans_space.c diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 4956ea9a307b..0c1a0b67af93 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -51,6 +51,7 @@ xfs-y += $(addprefix libxfs/, \ xfs_symlink_remote.o \ xfs_trans_inode.o \ xfs_trans_resv.o \ + xfs_trans_space.o \ xfs_types.o \ ) # xfs_rtbitmap is shared with libxfs diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index d564baf2549c..65616cfc1a2b 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -27,6 +27,10 @@ #include "xfs_xattr.h" #include "xfs_parent.h" #include "xfs_trans_space.h" +#include "xfs_attr_item.h" +#include "xfs_health.h" + +struct kmem_cache *xfs_parent_args_cache; /* * Parent pointer attribute handling. @@ -137,3 +141,67 @@ xfs_parent_hashattr( return xfs_parent_hashval(mp, name, namelen, be64_to_cpu(rec->p_ino)); } + +/* + * Initialize the parent pointer arguments structure. Caller must have zeroed + * the contents of @args. @tp is only required for updates. + */ +static void +xfs_parent_da_args_init( + struct xfs_da_args *args, + struct xfs_trans *tp, + struct xfs_parent_rec *rec, + struct xfs_inode *child, + xfs_ino_t owner, + const struct xfs_name *parent_name) +{ + args->geo = child->i_mount->m_attr_geo; + args->whichfork = XFS_ATTR_FORK; + args->attr_filter = XFS_ATTR_PARENT; + args->op_flags = XFS_DA_OP_LOGGED | XFS_DA_OP_OKNOENT; + args->trans = tp; + args->dp = child; + args->owner = owner; + args->name = parent_name->name; + args->namelen = parent_name->len; + args->value = rec; + args->valuelen = sizeof(struct xfs_parent_rec); + xfs_attr_sethash(args); +} + +/* Make sure the incore state is ready for a parent pointer query/update. */ +static inline int +xfs_parent_iread_extents( + struct xfs_trans *tp, + struct xfs_inode *child) +{ + /* Parent pointers require that the attr fork must exist. */ + if (XFS_IS_CORRUPT(child->i_mount, !xfs_inode_has_attr_fork(child))) { + xfs_inode_mark_sick(child, XFS_SICK_INO_PARENT); + return -EFSCORRUPTED; + } + + return xfs_iread_extents(tp, child, XFS_ATTR_FORK); +} + +/* Add a parent pointer to reflect a dirent addition. */ +int +xfs_parent_addname( + struct xfs_trans *tp, + struct xfs_parent_args *ppargs, + struct xfs_inode *dp, + const struct xfs_name *parent_name, + struct xfs_inode *child) +{ + int error; + + error = xfs_parent_iread_extents(tp, child); + if (error) + return error; + + xfs_inode_to_parent_rec(&ppargs->rec, dp); + xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child, + child->i_ino, parent_name); + xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_SET); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index 6a4028871b72..6de24e3ef318 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -17,4 +17,69 @@ xfs_dahash_t xfs_parent_hashval(struct xfs_mount *mp, const uint8_t *name, xfs_dahash_t xfs_parent_hashattr(struct xfs_mount *mp, const uint8_t *name, int namelen, const void *value, int valuelen); +/* Initializes a xfs_parent_rec to be stored as an attribute name. */ +static inline void +xfs_parent_rec_init( + struct xfs_parent_rec *rec, + xfs_ino_t ino, + uint32_t gen) +{ + rec->p_ino = cpu_to_be64(ino); + rec->p_gen = cpu_to_be32(gen); +} + +/* Initializes a xfs_parent_rec to be stored as an attribute name. */ +static inline void +xfs_inode_to_parent_rec( + struct xfs_parent_rec *rec, + const struct xfs_inode *dp) +{ + xfs_parent_rec_init(rec, dp->i_ino, VFS_IC(dp)->i_generation); +} + +extern struct kmem_cache *xfs_parent_args_cache; + +/* + * Parent pointer information needed to pass around the deferred xattr update + * machinery. + */ +struct xfs_parent_args { + struct xfs_parent_rec rec; + struct xfs_da_args args; +}; + +/* + * Start a parent pointer update by allocating the context object we need to + * perform a parent pointer update. + */ +static inline int +xfs_parent_start( + struct xfs_mount *mp, + struct xfs_parent_args **ppargsp) +{ + if (!xfs_has_parent(mp)) { + *ppargsp = NULL; + return 0; + } + + *ppargsp = kmem_cache_zalloc(xfs_parent_args_cache, GFP_KERNEL); + if (!*ppargsp) + return -ENOMEM; + return 0; +} + +/* Finish a parent pointer update by freeing the context object. */ +static inline void +xfs_parent_finish( + struct xfs_mount *mp, + struct xfs_parent_args *ppargs) +{ + if (ppargs) + kmem_cache_free(xfs_parent_args_cache, ppargs); +} + +int xfs_parent_addname(struct xfs_trans *tp, struct xfs_parent_args *ppargs, + struct xfs_inode *dp, const struct xfs_name *parent_name, + struct xfs_inode *child); + #endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c new file mode 100644 index 000000000000..90532c3fa205 --- /dev/null +++ b/fs/xfs/libxfs/xfs_trans_space.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000,2005 Silicon Graphics, Inc. + * All Rights Reserved. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_da_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" + +/* Calculate the disk space required to add a parent pointer. */ +unsigned int +xfs_parent_calc_space_res( + struct xfs_mount *mp, + unsigned int namelen) +{ + /* + * Parent pointers are always the first attr in an attr tree, and never + * larger than a block + */ + return XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + + XFS_NEXTENTADD_SPACE_RES(mp, namelen, XFS_ATTR_FORK); +} + +unsigned int +xfs_create_space_res( + struct xfs_mount *mp, + unsigned int namelen) +{ + unsigned int ret; + + ret = XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp, namelen); + if (xfs_has_parent(mp)) + ret += xfs_parent_calc_space_res(mp, namelen); + + return ret; +} + +unsigned int +xfs_mkdir_space_res( + struct xfs_mount *mp, + unsigned int namelen) +{ + return xfs_create_space_res(mp, namelen); +} diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 9640fc232c14..6cda87153b38 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -80,8 +80,6 @@ /* This macro is not used - see inline code in xfs_attr_set */ #define XFS_ATTRSET_SPACE_RES(mp, v) \ (XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + XFS_B_TO_FSB(mp, v)) -#define XFS_CREATE_SPACE_RES(mp,nl) \ - (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) #define XFS_DIOSTRAT_SPACE_RES(mp, v) \ (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v)) #define XFS_GROWFS_SPACE_RES(mp) \ @@ -90,8 +88,6 @@ ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) #define XFS_LINK_SPACE_RES(mp,nl) \ XFS_DIRENTER_SPACE_RES(mp,nl) -#define XFS_MKDIR_SPACE_RES(mp,nl) \ - (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) #define XFS_QM_DQALLOC_SPACE_RES(mp) \ (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \ XFS_DQUOT_CLUSTER_SIZE_FSB) @@ -106,5 +102,10 @@ #define XFS_IFREE_SPACE_RES(mp) \ (xfs_has_finobt(mp) ? M_IGEO(mp)->inobt_maxlevels : 0) +unsigned int xfs_parent_calc_space_res(struct xfs_mount *mp, + unsigned int namelen); + +unsigned int xfs_create_space_res(struct xfs_mount *mp, unsigned int namelen); +unsigned int xfs_mkdir_space_res(struct xfs_mount *mp, unsigned int namelen); #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index 6f39504a216e..ddbcccb3dba1 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -71,7 +71,7 @@ xrep_tempfile_create( return error; if (is_dir) { - resblks = XFS_MKDIR_SPACE_RES(mp, 0); + resblks = xfs_mkdir_space_res(mp, 0); tres = &M_RES(mp)->tr_mkdir; } else { resblks = XFS_IALLOC_SPACE_RES(mp); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 060e4e767b51..0dd4111a6773 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -40,6 +40,8 @@ #include "xfs_log_priv.h" #include "xfs_health.h" #include "xfs_pnfs.h" +#include "xfs_parent.h" +#include "xfs_xattr.h" struct kmem_cache *xfs_inode_cache; @@ -1017,7 +1019,7 @@ xfs_dir_hook_setup( int xfs_create( struct mnt_idmap *idmap, - xfs_inode_t *dp, + struct xfs_inode *dp, struct xfs_name *name, umode_t mode, dev_t rdev, @@ -1029,7 +1031,7 @@ xfs_create( struct xfs_inode *ip = NULL; struct xfs_trans *tp = NULL; int error; - bool unlock_dp_on_error = false; + bool unlock_dp_on_error = false; prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; @@ -1037,6 +1039,7 @@ xfs_create( struct xfs_trans_res *tres; uint resblks; xfs_ino_t ino; + struct xfs_parent_args *ppargs; trace_xfs_create(dp, name); @@ -1058,13 +1061,17 @@ xfs_create( return error; if (is_dir) { - resblks = XFS_MKDIR_SPACE_RES(mp, name->len); + resblks = xfs_mkdir_space_res(mp, name->len); tres = &M_RES(mp)->tr_mkdir; } else { - resblks = XFS_CREATE_SPACE_RES(mp, name->len); + resblks = xfs_create_space_res(mp, name->len); tres = &M_RES(mp)->tr_create; } + error = xfs_parent_start(mp, &ppargs); + if (error) + goto out_release_dquots; + /* * Initially assume that the file does not exist and * reserve the resources for that case. If that is not @@ -1080,7 +1087,7 @@ xfs_create( resblks, &tp); } if (error) - goto out_release_dquots; + goto out_parent; xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); unlock_dp_on_error = true; @@ -1123,6 +1130,16 @@ xfs_create( xfs_bumplink(tp, dp); } + /* + * If we have parent pointers, we need to add the attribute containing + * the parent information now. + */ + if (ppargs) { + error = xfs_parent_addname(tp, ppargs, dp, name, ip); + if (error) + goto out_trans_cancel; + } + /* * Create ip with a reference from dp, and add '.' and '..' references * if it's a directory. @@ -1155,6 +1172,7 @@ xfs_create( *ipp = ip; xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); + xfs_parent_finish(mp, ppargs); return 0; out_trans_cancel: @@ -1170,6 +1188,8 @@ xfs_create( xfs_finish_inode_setup(ip); xfs_irele(ip); } + out_parent: + xfs_parent_finish(mp, ppargs); out_release_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); @@ -3038,7 +3058,7 @@ xfs_rename_alloc_whiteout( int error; error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE, - false, &tmpfile); + xfs_has_parent(dp->i_mount), &tmpfile); if (error) return error; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index d32322f9ecde..ff222827e550 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -157,6 +157,8 @@ xfs_create_need_xattr( if (dir->i_sb->s_security) return true; #endif + if (xfs_has_parent(XFS_I(dir)->i_mount)) + return true; return false; } @@ -201,7 +203,18 @@ xfs_generic_create( xfs_create_need_xattr(dir, default_acl, acl), &ip); } else { - error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, false, &ip); + bool init_xattrs = false; + + /* + * If this temporary file will be linkable, set up the file + * with an attr fork to receive a parent pointer. + */ + if (!(tmpfile->f_flags & O_EXCL) && + xfs_has_parent(XFS_I(dir)->i_mount)) + init_xattrs = true; + + error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, + init_xattrs, &ip); } if (unlikely(error)) goto out_free_acl; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e525a6c477ff..c303d7ff9597 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -44,6 +44,7 @@ #include "xfs_dahash_test.h" #include "xfs_rtbitmap.h" #include "xfs_exchmaps_item.h" +#include "xfs_parent.h" #include "scrub/stats.h" #include "scrub/rcbag_btree.h" @@ -2211,8 +2212,16 @@ xfs_init_caches(void) if (!xfs_xmi_cache) goto out_destroy_xmd_cache; + xfs_parent_args_cache = kmem_cache_create("xfs_parent_args", + sizeof(struct xfs_parent_args), + 0, 0, NULL); + if (!xfs_parent_args_cache) + goto out_destroy_xmi_cache; + return 0; + out_destroy_xmi_cache: + kmem_cache_destroy(xfs_xmi_cache); out_destroy_xmd_cache: kmem_cache_destroy(xfs_xmd_cache); out_destroy_iul_cache: @@ -2273,6 +2282,7 @@ xfs_destroy_caches(void) * destroy caches. */ rcu_barrier(); + kmem_cache_destroy(xfs_parent_args_cache); kmem_cache_destroy(xfs_xmd_cache); kmem_cache_destroy(xfs_xmi_cache); kmem_cache_destroy(xfs_iunlink_cache); From f1097be220fa938de5114db57a1ddb5de2bf6046 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:48 -0700 Subject: [PATCH 16/30] xfs: add parent attributes to link This patch modifies xfs_link to add a parent pointer to the inode. Signed-off-by: Dave Chinner Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong [djwong: minor rebase fixes] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_trans_space.c | 14 +++++++++++ fs/xfs/libxfs/xfs_trans_space.h | 3 +-- fs/xfs/scrub/dir_repair.c | 2 +- fs/xfs/scrub/orphanage.c | 2 +- fs/xfs/xfs_inode.c | 43 ++++++++++++++++++++++++++++----- 5 files changed, 54 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c index 90532c3fa205..cf775750120e 100644 --- a/fs/xfs/libxfs/xfs_trans_space.c +++ b/fs/xfs/libxfs/xfs_trans_space.c @@ -50,3 +50,17 @@ xfs_mkdir_space_res( { return xfs_create_space_res(mp, namelen); } + +unsigned int +xfs_link_space_res( + struct xfs_mount *mp, + unsigned int namelen) +{ + unsigned int ret; + + ret = XFS_DIRENTER_SPACE_RES(mp, namelen); + if (xfs_has_parent(mp)) + ret += xfs_parent_calc_space_res(mp, namelen); + + return ret; +} diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 6cda87153b38..5539634009fb 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -86,8 +86,6 @@ (2 * (mp)->m_alloc_maxlevels) #define XFS_GROWFSRT_SPACE_RES(mp,b) \ ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) -#define XFS_LINK_SPACE_RES(mp,nl) \ - XFS_DIRENTER_SPACE_RES(mp,nl) #define XFS_QM_DQALLOC_SPACE_RES(mp) \ (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \ XFS_DQUOT_CLUSTER_SIZE_FSB) @@ -107,5 +105,6 @@ unsigned int xfs_parent_calc_space_res(struct xfs_mount *mp, unsigned int xfs_create_space_res(struct xfs_mount *mp, unsigned int namelen); unsigned int xfs_mkdir_space_res(struct xfs_mount *mp, unsigned int namelen); +unsigned int xfs_link_space_res(struct xfs_mount *mp, unsigned int namelen); #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 38957da26b94..575397aef1f7 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -704,7 +704,7 @@ xrep_dir_replay_update( uint resblks; int error; - resblks = XFS_LINK_SPACE_RES(mp, xname->len); + resblks = xfs_link_space_res(mp, xname->len); error = xchk_trans_alloc(rd->sc, resblks); if (error) return error; diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 885b7d478a0a..5e2c3546f2e9 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -326,7 +326,7 @@ xrep_adoption_trans_alloc( /* Compute the worst case space reservation that we need. */ adopt->sc = sc; - adopt->orphanage_blkres = XFS_LINK_SPACE_RES(mp, MAXNAMELEN); + adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN); if (S_ISDIR(VFS_I(sc->ip)->i_mode)) child_blkres = XFS_RENAME_SPACE_RES(mp, xfs_name_dotdot.len); adopt->child_blkres = child_blkres; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0dd4111a6773..61a390f5e2ae 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1299,14 +1299,15 @@ xfs_create_tmpfile( int xfs_link( - xfs_inode_t *tdp, - xfs_inode_t *sip, + struct xfs_inode *tdp, + struct xfs_inode *sip, struct xfs_name *target_name) { - xfs_mount_t *mp = tdp->i_mount; - xfs_trans_t *tp; + struct xfs_mount *mp = tdp->i_mount; + struct xfs_trans *tp; int error, nospace_error = 0; int resblks; + struct xfs_parent_args *ppargs; trace_xfs_link(tdp, target_name); @@ -1325,11 +1326,25 @@ xfs_link( if (error) goto std_return; - resblks = XFS_LINK_SPACE_RES(mp, target_name->len); + error = xfs_parent_start(mp, &ppargs); + if (error) + goto std_return; + + resblks = xfs_link_space_res(mp, target_name->len); error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks, &tp, &nospace_error); if (error) - goto std_return; + goto out_parent; + + /* + * We don't allow reservationless or quotaless hardlinking when parent + * pointers are enabled because we can't back out if the xattrs must + * grow. + */ + if (ppargs && nospace_error) { + error = nospace_error; + goto error_return; + } /* * If we are using project inheritance, we only allow hard link @@ -1380,6 +1395,19 @@ xfs_link( xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); xfs_bumplink(tp, sip); + + /* + * If we have parent pointers, we now need to add the parent record to + * the attribute fork of the inode. If this is the initial parent + * attribute, we need to create it correctly, otherwise we can just add + * the parent to the inode. + */ + if (ppargs) { + error = xfs_parent_addname(tp, ppargs, tdp, target_name, sip); + if (error) + goto error_return; + } + xfs_dir_update_hook(tdp, sip, 1, target_name); /* @@ -1393,12 +1421,15 @@ xfs_link( error = xfs_trans_commit(tp); xfs_iunlock(tdp, XFS_ILOCK_EXCL); xfs_iunlock(sip, XFS_ILOCK_EXCL); + xfs_parent_finish(mp, ppargs); return error; error_return: xfs_trans_cancel(tp); xfs_iunlock(tdp, XFS_ILOCK_EXCL); xfs_iunlock(sip, XFS_ILOCK_EXCL); + out_parent: + xfs_parent_finish(mp, ppargs); std_return: if (error == -ENOSPC && nospace_error) error = nospace_error; From 5d31a85dcc1fa4c5d4a925c6da67751653a700ba Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:49 -0700 Subject: [PATCH 17/30] xfs: add parent attributes to symlink This patch modifies xfs_symlink to add a parent pointer to the inode. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong [djwong: minor rebase fixups] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_trans_space.c | 17 +++++++++++++++++ fs/xfs/libxfs/xfs_trans_space.h | 4 ++-- fs/xfs/scrub/symlink_repair.c | 2 +- fs/xfs/xfs_symlink.c | 30 +++++++++++++++++++++++++----- 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c index cf775750120e..c8adda82debe 100644 --- a/fs/xfs/libxfs/xfs_trans_space.c +++ b/fs/xfs/libxfs/xfs_trans_space.c @@ -64,3 +64,20 @@ xfs_link_space_res( return ret; } + +unsigned int +xfs_symlink_space_res( + struct xfs_mount *mp, + unsigned int namelen, + unsigned int fsblocks) +{ + unsigned int ret; + + ret = XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp, namelen) + + fsblocks; + + if (xfs_has_parent(mp)) + ret += xfs_parent_calc_space_res(mp, namelen); + + return ret; +} diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 5539634009fb..354ad1d6e18d 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -95,8 +95,6 @@ XFS_DIRREMOVE_SPACE_RES(mp) #define XFS_RENAME_SPACE_RES(mp,nl) \ (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) -#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ - (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) #define XFS_IFREE_SPACE_RES(mp) \ (xfs_has_finobt(mp) ? M_IGEO(mp)->inobt_maxlevels : 0) @@ -106,5 +104,7 @@ unsigned int xfs_parent_calc_space_res(struct xfs_mount *mp, unsigned int xfs_create_space_res(struct xfs_mount *mp, unsigned int namelen); unsigned int xfs_mkdir_space_res(struct xfs_mount *mp, unsigned int namelen); unsigned int xfs_link_space_res(struct xfs_mount *mp, unsigned int namelen); +unsigned int xfs_symlink_space_res(struct xfs_mount *mp, unsigned int namelen, + unsigned int fsblocks); #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c index 178304959535..c8b5a5b878ac 100644 --- a/fs/xfs/scrub/symlink_repair.c +++ b/fs/xfs/scrub/symlink_repair.c @@ -421,7 +421,7 @@ xrep_symlink_rebuild( * unlikely. */ fs_blocks = xfs_symlink_blocks(sc->mp, target_len); - resblks = XFS_SYMLINK_SPACE_RES(sc->mp, target_len, fs_blocks); + resblks = xfs_symlink_space_res(sc->mp, target_len, fs_blocks); error = xfs_trans_reserve_quota_nblks(sc->tp, sc->tempip, resblks, 0, true); if (error) diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 85ef56fdd7df..17aee806ec2e 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -25,6 +25,8 @@ #include "xfs_error.h" #include "xfs_health.h" #include "xfs_symlink_remote.h" +#include "xfs_parent.h" +#include "xfs_defer.h" int xfs_readlink( @@ -100,6 +102,7 @@ xfs_symlink( struct xfs_dquot *pdqp = NULL; uint resblks; xfs_ino_t ino; + struct xfs_parent_args *ppargs; *ipp = NULL; @@ -130,18 +133,24 @@ xfs_symlink( /* * The symlink will fit into the inode data fork? - * There can't be any attributes so we get the whole variable part. + * If there are no parent pointers, then there wont't be any attributes. + * So we get the whole variable part, and do not need to reserve extra + * blocks. Otherwise, we need to reserve the blocks. */ - if (pathlen <= XFS_LITINO(mp)) + if (pathlen <= XFS_LITINO(mp) && !xfs_has_parent(mp)) fs_blocks = 0; else fs_blocks = xfs_symlink_blocks(mp, pathlen); - resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); + resblks = xfs_symlink_space_res(mp, link_name->len, fs_blocks); + + error = xfs_parent_start(mp, &ppargs); + if (error) + goto out_release_dquots; error = xfs_trans_alloc_icreate(mp, &M_RES(mp)->tr_symlink, udqp, gdqp, pdqp, resblks, &tp); if (error) - goto out_release_dquots; + goto out_parent; xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); unlock_dp_on_error = true; @@ -161,7 +170,7 @@ xfs_symlink( if (!error) error = xfs_init_new_inode(idmap, tp, dp, ino, S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, - false, &ip); + xfs_has_parent(mp), &ip); if (error) goto out_trans_cancel; @@ -195,6 +204,14 @@ xfs_symlink( goto out_trans_cancel; xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + + /* Add parent pointer for the new symlink. */ + if (ppargs) { + error = xfs_parent_addname(tp, ppargs, dp, link_name, ip); + if (error) + goto out_trans_cancel; + } + xfs_dir_update_hook(dp, ip, 1, link_name); /* @@ -216,6 +233,7 @@ xfs_symlink( *ipp = ip; xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); + xfs_parent_finish(mp, ppargs); return 0; out_trans_cancel: @@ -231,6 +249,8 @@ xfs_symlink( xfs_finish_inode_setup(ip); xfs_irele(ip); } +out_parent: + xfs_parent_finish(mp, ppargs); out_release_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); From d2d18330f63cd70b50eddac76de7c59a36f2faa7 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:49 -0700 Subject: [PATCH 18/30] xfs: remove parent pointers in unlink This patch removes the parent pointer attribute during unlink Signed-off-by: Dave Chinner Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong [djwong: adjust to new ondisk format, minor rebase fixes] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_parent.c | 22 ++++++++++++++++++++++ fs/xfs/libxfs/xfs_parent.h | 3 +++ fs/xfs/libxfs/xfs_trans_space.c | 13 +++++++++++++ fs/xfs/libxfs/xfs_trans_space.h | 3 +-- fs/xfs/xfs_inode.c | 27 +++++++++++++++++++++------ 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index 65616cfc1a2b..6142e68f2338 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -205,3 +205,25 @@ xfs_parent_addname( xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_SET); return 0; } + +/* Remove a parent pointer to reflect a dirent removal. */ +int +xfs_parent_removename( + struct xfs_trans *tp, + struct xfs_parent_args *ppargs, + struct xfs_inode *dp, + const struct xfs_name *parent_name, + struct xfs_inode *child) +{ + int error; + + error = xfs_parent_iread_extents(tp, child); + if (error) + return error; + + xfs_inode_to_parent_rec(&ppargs->rec, dp); + xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child, + child->i_ino, parent_name); + xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REMOVE); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index 6de24e3ef318..4a7fd48c226a 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -81,5 +81,8 @@ xfs_parent_finish( int xfs_parent_addname(struct xfs_trans *tp, struct xfs_parent_args *ppargs, struct xfs_inode *dp, const struct xfs_name *parent_name, struct xfs_inode *child); +int xfs_parent_removename(struct xfs_trans *tp, struct xfs_parent_args *ppargs, + struct xfs_inode *dp, const struct xfs_name *parent_name, + struct xfs_inode *child); #endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c index c8adda82debe..df729e4f1a4c 100644 --- a/fs/xfs/libxfs/xfs_trans_space.c +++ b/fs/xfs/libxfs/xfs_trans_space.c @@ -81,3 +81,16 @@ xfs_symlink_space_res( return ret; } + +unsigned int +xfs_remove_space_res( + struct xfs_mount *mp, + unsigned int namelen) +{ + unsigned int ret = XFS_DIRREMOVE_SPACE_RES(mp); + + if (xfs_has_parent(mp)) + ret += xfs_parent_calc_space_res(mp, namelen); + + return ret; +} diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 354ad1d6e18d..a4490813c56f 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -91,8 +91,6 @@ XFS_DQUOT_CLUSTER_SIZE_FSB) #define XFS_QM_QINOCREATE_SPACE_RES(mp) \ XFS_IALLOC_SPACE_RES(mp) -#define XFS_REMOVE_SPACE_RES(mp) \ - XFS_DIRREMOVE_SPACE_RES(mp) #define XFS_RENAME_SPACE_RES(mp,nl) \ (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) #define XFS_IFREE_SPACE_RES(mp) \ @@ -106,5 +104,6 @@ unsigned int xfs_mkdir_space_res(struct xfs_mount *mp, unsigned int namelen); unsigned int xfs_link_space_res(struct xfs_mount *mp, unsigned int namelen); unsigned int xfs_symlink_space_res(struct xfs_mount *mp, unsigned int namelen, unsigned int fsblocks); +unsigned int xfs_remove_space_res(struct xfs_mount *mp, unsigned int namelen); #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 61a390f5e2ae..c4a1c2dd5261 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2721,16 +2721,17 @@ xfs_iunpin_wait( */ int xfs_remove( - xfs_inode_t *dp, + struct xfs_inode *dp, struct xfs_name *name, - xfs_inode_t *ip) + struct xfs_inode *ip) { - xfs_mount_t *mp = dp->i_mount; - xfs_trans_t *tp = NULL; + struct xfs_mount *mp = dp->i_mount; + struct xfs_trans *tp = NULL; int is_dir = S_ISDIR(VFS_I(ip)->i_mode); int dontcare; int error = 0; uint resblks; + struct xfs_parent_args *ppargs; trace_xfs_remove(dp, name); @@ -2747,6 +2748,10 @@ xfs_remove( if (error) goto std_return; + error = xfs_parent_start(mp, &ppargs); + if (error) + goto std_return; + /* * We try to get the real space reservation first, allowing for * directory btree deletion(s) implying possible bmap insert(s). If we @@ -2758,12 +2763,12 @@ xfs_remove( * the directory code can handle a reservationless update and we don't * want to prevent a user from trying to free space by deleting things. */ - resblks = XFS_REMOVE_SPACE_RES(mp); + resblks = xfs_remove_space_res(mp, name->len); error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks, &tp, &dontcare); if (error) { ASSERT(error != -ENOSPC); - goto std_return; + goto out_parent; } /* @@ -2823,6 +2828,13 @@ xfs_remove( goto out_trans_cancel; } + /* Remove parent pointer. */ + if (ppargs) { + error = xfs_parent_removename(tp, ppargs, dp, name, ip); + if (error) + goto out_trans_cancel; + } + /* * Drop the link from dp to ip, and if ip was a directory, remove the * '.' and '..' references since we freed the directory. @@ -2846,6 +2858,7 @@ xfs_remove( xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); + xfs_parent_finish(mp, ppargs); return 0; out_trans_cancel: @@ -2853,6 +2866,8 @@ xfs_remove( out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); + out_parent: + xfs_parent_finish(mp, ppargs); std_return: return error; } From 5a8338c88284df4e9e697225aa65f2709333a659 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:50 -0700 Subject: [PATCH 19/30] xfs: Add parent pointers to rename This patch removes the old parent pointer attribute during the rename operation, and re-adds the updated parent pointer. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong [djwong: adjust to new ondisk format] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_parent.c | 30 +++++++++++++ fs/xfs/libxfs/xfs_parent.h | 6 +++ fs/xfs/libxfs/xfs_trans_space.c | 25 +++++++++++ fs/xfs/libxfs/xfs_trans_space.h | 6 ++- fs/xfs/scrub/orphanage.c | 3 +- fs/xfs/scrub/parent_repair.c | 3 +- fs/xfs/xfs_inode.c | 80 ++++++++++++++++++++++++++++++--- 7 files changed, 142 insertions(+), 11 deletions(-) diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index 6142e68f2338..fdf643bfde4d 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -227,3 +227,33 @@ xfs_parent_removename( xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REMOVE); return 0; } + +/* Replace one parent pointer with another to reflect a rename. */ +int +xfs_parent_replacename( + struct xfs_trans *tp, + struct xfs_parent_args *ppargs, + struct xfs_inode *old_dp, + const struct xfs_name *old_name, + struct xfs_inode *new_dp, + const struct xfs_name *new_name, + struct xfs_inode *child) +{ + int error; + + error = xfs_parent_iread_extents(tp, child); + if (error) + return error; + + xfs_inode_to_parent_rec(&ppargs->rec, old_dp); + xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child, + child->i_ino, old_name); + + xfs_inode_to_parent_rec(&ppargs->new_rec, new_dp); + ppargs->args.new_name = new_name->name; + ppargs->args.new_namelen = new_name->len; + ppargs->args.new_value = &ppargs->new_rec; + ppargs->args.new_valuelen = sizeof(struct xfs_parent_rec); + xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REPLACE); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index 4a7fd48c226a..768633b31367 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -45,6 +45,7 @@ extern struct kmem_cache *xfs_parent_args_cache; */ struct xfs_parent_args { struct xfs_parent_rec rec; + struct xfs_parent_rec new_rec; struct xfs_da_args args; }; @@ -84,5 +85,10 @@ int xfs_parent_addname(struct xfs_trans *tp, struct xfs_parent_args *ppargs, int xfs_parent_removename(struct xfs_trans *tp, struct xfs_parent_args *ppargs, struct xfs_inode *dp, const struct xfs_name *parent_name, struct xfs_inode *child); +int xfs_parent_replacename(struct xfs_trans *tp, + struct xfs_parent_args *ppargs, + struct xfs_inode *old_dp, const struct xfs_name *old_name, + struct xfs_inode *new_dp, const struct xfs_name *new_name, + struct xfs_inode *child); #endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c index df729e4f1a4c..b9dc3752f702 100644 --- a/fs/xfs/libxfs/xfs_trans_space.c +++ b/fs/xfs/libxfs/xfs_trans_space.c @@ -94,3 +94,28 @@ xfs_remove_space_res( return ret; } + +unsigned int +xfs_rename_space_res( + struct xfs_mount *mp, + unsigned int src_namelen, + bool target_exists, + unsigned int target_namelen, + bool has_whiteout) +{ + unsigned int ret; + + ret = XFS_DIRREMOVE_SPACE_RES(mp) + + XFS_DIRENTER_SPACE_RES(mp, target_namelen); + + if (xfs_has_parent(mp)) { + if (has_whiteout) + ret += xfs_parent_calc_space_res(mp, src_namelen); + ret += 2 * xfs_parent_calc_space_res(mp, target_namelen); + } + + if (target_exists) + ret += xfs_parent_calc_space_res(mp, target_namelen); + + return ret; +} diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index a4490813c56f..1155ff2d37e2 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -91,8 +91,6 @@ XFS_DQUOT_CLUSTER_SIZE_FSB) #define XFS_QM_QINOCREATE_SPACE_RES(mp) \ XFS_IALLOC_SPACE_RES(mp) -#define XFS_RENAME_SPACE_RES(mp,nl) \ - (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) #define XFS_IFREE_SPACE_RES(mp) \ (xfs_has_finobt(mp) ? M_IGEO(mp)->inobt_maxlevels : 0) @@ -106,4 +104,8 @@ unsigned int xfs_symlink_space_res(struct xfs_mount *mp, unsigned int namelen, unsigned int fsblocks); unsigned int xfs_remove_space_res(struct xfs_mount *mp, unsigned int namelen); +unsigned int xfs_rename_space_res(struct xfs_mount *mp, + unsigned int src_namelen, bool target_exists, + unsigned int target_namelen, bool has_whiteout); + #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 5e2c3546f2e9..94bcc2799188 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -328,7 +328,8 @@ xrep_adoption_trans_alloc( adopt->sc = sc; adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN); if (S_ISDIR(VFS_I(sc->ip)->i_mode)) - child_blkres = XFS_RENAME_SPACE_RES(mp, xfs_name_dotdot.len); + child_blkres = xfs_rename_space_res(mp, 0, false, + xfs_name_dotdot.len, false); adopt->child_blkres = child_blkres; /* diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index ebb5791bf839..63590e1b3506 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -171,7 +171,8 @@ xrep_parent_reset_dotdot( * Reserve more space just in case we have to expand the dir. We're * allowed to exceed quota to repair inconsistent metadata. */ - spaceres = XFS_RENAME_SPACE_RES(sc->mp, xfs_name_dotdot.len); + spaceres = xfs_rename_space_res(sc->mp, 0, false, xfs_name_dotdot.len, + false); error = xfs_trans_reserve_more_inode(sc->tp, sc->ip, spaceres, 0, true); if (error) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c4a1c2dd5261..59488c17e1c1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3148,6 +3148,9 @@ xfs_rename( struct xfs_trans *tp; struct xfs_inode *wip = NULL; /* whiteout inode */ struct xfs_inode *inodes[__XFS_SORT_INODES]; + struct xfs_parent_args *src_ppargs = NULL; + struct xfs_parent_args *tgt_ppargs = NULL; + struct xfs_parent_args *wip_ppargs = NULL; int i; int num_inodes = __XFS_SORT_INODES; bool new_parent = (src_dp != target_dp); @@ -3179,9 +3182,26 @@ xfs_rename( xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, inodes, &num_inodes); + error = xfs_parent_start(mp, &src_ppargs); + if (error) + goto out_release_wip; + + if (wip) { + error = xfs_parent_start(mp, &wip_ppargs); + if (error) + goto out_src_ppargs; + } + + if (target_ip) { + error = xfs_parent_start(mp, &tgt_ppargs); + if (error) + goto out_wip_ppargs; + } + retry: nospace_error = 0; - spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); + spaceres = xfs_rename_space_res(mp, src_name->len, target_ip != NULL, + target_name->len, wip != NULL); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp); if (error == -ENOSPC) { nospace_error = error; @@ -3190,7 +3210,17 @@ xfs_rename( &tp); } if (error) - goto out_release_wip; + goto out_tgt_ppargs; + + /* + * We don't allow reservationless renaming when parent pointers are + * enabled because we can't back out if the xattrs must grow. + */ + if (src_ppargs && nospace_error) { + error = nospace_error; + xfs_trans_cancel(tp); + goto out_tgt_ppargs; + } /* * Attach the dquots to the inodes @@ -3198,7 +3228,7 @@ xfs_rename( error = xfs_qm_vop_rename_dqattach(inodes); if (error) { xfs_trans_cancel(tp); - goto out_release_wip; + goto out_tgt_ppargs; } /* @@ -3267,6 +3297,15 @@ xfs_rename( goto out_trans_cancel; } + /* + * We don't allow quotaless renaming when parent pointers are enabled + * because we can't back out if the xattrs must grow. + */ + if (src_ppargs && nospace_error) { + error = nospace_error; + goto out_trans_cancel; + } + /* * Check for expected errors before we dirty the transaction * so we can return an error without a transaction abort. @@ -3459,6 +3498,28 @@ xfs_rename( if (error) goto out_trans_cancel; + /* Schedule parent pointer updates. */ + if (wip_ppargs) { + error = xfs_parent_addname(tp, wip_ppargs, src_dp, src_name, + wip); + if (error) + goto out_trans_cancel; + } + + if (src_ppargs) { + error = xfs_parent_replacename(tp, src_ppargs, src_dp, + src_name, target_dp, target_name, src_ip); + if (error) + goto out_trans_cancel; + } + + if (tgt_ppargs) { + error = xfs_parent_removename(tp, tgt_ppargs, target_dp, + target_name, target_ip); + if (error) + goto out_trans_cancel; + } + xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) @@ -3480,14 +3541,19 @@ xfs_rename( xfs_dir_update_hook(src_dp, wip, 1, src_name); error = xfs_finish_rename(tp); - xfs_iunlock_rename(inodes, num_inodes); - if (wip) - xfs_irele(wip); - return error; + nospace_error = 0; + goto out_unlock; out_trans_cancel: xfs_trans_cancel(tp); +out_unlock: xfs_iunlock_rename(inodes, num_inodes); +out_tgt_ppargs: + xfs_parent_finish(mp, tgt_ppargs); +out_wip_ppargs: + xfs_parent_finish(mp, wip_ppargs); +out_src_ppargs: + xfs_parent_finish(mp, src_ppargs); out_release_wip: if (wip) xfs_irele(wip); From 1c12949e50e191933c08758ae53e31b852e730d6 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:51 -0700 Subject: [PATCH 20/30] xfs: Add parent pointers to xfs_cross_rename Cross renames are handled separately from standard renames, and need different handling to update the parent attributes correctly. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 59488c17e1c1..ebe2ce9bd9ee 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2972,15 +2972,17 @@ xfs_cross_rename( struct xfs_inode *dp1, struct xfs_name *name1, struct xfs_inode *ip1, + struct xfs_parent_args *ip1_ppargs, struct xfs_inode *dp2, struct xfs_name *name2, struct xfs_inode *ip2, + struct xfs_parent_args *ip2_ppargs, int spaceres) { - int error = 0; - int ip1_flags = 0; - int ip2_flags = 0; - int dp2_flags = 0; + int error = 0; + int ip1_flags = 0; + int ip2_flags = 0; + int dp2_flags = 0; /* Swap inode number for dirent in first parent */ error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres); @@ -3049,6 +3051,21 @@ xfs_cross_rename( } } + /* Schedule parent pointer replacements */ + if (ip1_ppargs) { + error = xfs_parent_replacename(tp, ip1_ppargs, dp1, name1, dp2, + name2, ip1); + if (error) + goto out_trans_abort; + } + + if (ip2_ppargs) { + error = xfs_parent_replacename(tp, ip2_ppargs, dp2, name2, dp1, + name1, ip2); + if (error) + goto out_trans_abort; + } + if (ip1_flags) { xfs_trans_ichgtime(tp, ip1, ip1_flags); xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); @@ -3265,10 +3282,10 @@ xfs_rename( /* RENAME_EXCHANGE is unique from here on. */ if (flags & RENAME_EXCHANGE) { error = xfs_cross_rename(tp, src_dp, src_name, src_ip, - target_dp, target_name, target_ip, - spaceres); - xfs_iunlock_rename(inodes, num_inodes); - return error; + src_ppargs, target_dp, target_name, target_ip, + tgt_ppargs, spaceres); + nospace_error = 0; + goto out_unlock; } /* From daf9f884906bcfcffe26967aee9ece893fba019b Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:52 -0700 Subject: [PATCH 21/30] xfs: don't return XFS_ATTR_PARENT attributes via listxattr Parent pointers are internal filesystem metadata. They're not intended to be directly visible to userspace, so filter them out of xfs_xattr_put_listent so that they don't appear in listxattr. Signed-off-by: Allison Henderson Inspired-by: Andrey Albershteyn Reviewed-by: Darrick J. Wong [djwong: change this to XFS_ATTR_PRIVATE_NSP_MASK per fsverity patchset] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_da_format.h | 3 +++ fs/xfs/xfs_xattr.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 1395ad1937c5..ebde6eb1da65 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -726,6 +726,9 @@ struct xfs_attr3_leafblock { XFS_ATTR_SECURE | \ XFS_ATTR_PARENT) +/* Private attr namespaces not exposed to userspace */ +#define XFS_ATTR_PRIVATE_NSP_MASK (XFS_ATTR_PARENT) + #define XFS_ATTR_ONDISK_MASK (XFS_ATTR_NSP_ONDISK_MASK | \ XFS_ATTR_LOCAL | \ XFS_ATTR_INCOMPLETE) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index ba56a9e73144..1e82d11d980f 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -229,6 +229,10 @@ xfs_xattr_put_listent( ASSERT(context->count >= 0); + /* Don't expose private xattr namespaces. */ + if (flags & XFS_ATTR_PRIVATE_NSP_MASK) + return; + if (flags & XFS_ATTR_ROOT) { #ifdef CONFIG_XFS_POSIX_ACL if (namelen == SGI_ACL_FILE_SIZE && From 8f4b980ee67fe53a77b70b1fdd8e15f2fe37180c Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:53 -0700 Subject: [PATCH 22/30] xfs: pass the attr value to put_listent when possible Pass the attr value to put_listent when we have local xattrs or shortform xattrs. This will enable the GETPARENTS ioctl to use xfs_attr_list as its backend. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.h | 5 +++-- fs/xfs/libxfs/xfs_attr_sf.h | 1 + fs/xfs/xfs_attr_list.c | 8 +++++++- fs/xfs/xfs_ioctl.c | 1 + fs/xfs/xfs_xattr.c | 1 + 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index d0ed7ea58ab0..d12583dd7eec 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -47,8 +47,9 @@ struct xfs_attrlist_cursor_kern { /* void; state communicated via *context */ -typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int, - unsigned char *, int, int); +typedef void (*put_listent_func_t)(struct xfs_attr_list_context *context, + int flags, unsigned char *name, int namelen, void *value, + int valuelen); struct xfs_attr_list_context { struct xfs_trans *tp; diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index bc4422223024..73bdc0e55682 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -16,6 +16,7 @@ typedef struct xfs_attr_sf_sort { uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ xfs_dahash_t hash; /* this entry's hash value */ unsigned char *name; /* name value, pointer into buffer */ + void *value; } xfs_attr_sf_sort_t; #define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \ diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 9bc4b5322539..5c947e5ce8b8 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -92,6 +92,7 @@ xfs_attr_shortform_list( sfe->flags, sfe->nameval, (int)sfe->namelen, + &sfe->nameval[sfe->namelen], (int)sfe->valuelen); /* * Either search callback finished early or @@ -138,6 +139,7 @@ xfs_attr_shortform_list( sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ + sbp->value = &sfe->nameval[sfe->namelen], sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags, @@ -192,6 +194,7 @@ xfs_attr_shortform_list( sbp->flags, sbp->name, sbp->namelen, + sbp->value, sbp->valuelen); if (context->seen_enough) break; @@ -479,6 +482,7 @@ xfs_attr3_leaf_list_int( */ for (; i < ichdr.count; entry++, i++) { char *name; + void *value; int namelen, valuelen; if (be32_to_cpu(entry->hashval) != cursor->hashval) { @@ -496,6 +500,7 @@ xfs_attr3_leaf_list_int( name_loc = xfs_attr3_leaf_name_local(leaf, i); name = name_loc->nameval; namelen = name_loc->namelen; + value = &name_loc->nameval[name_loc->namelen]; valuelen = be16_to_cpu(name_loc->valuelen); } else { xfs_attr_leaf_name_remote_t *name_rmt; @@ -503,6 +508,7 @@ xfs_attr3_leaf_list_int( name_rmt = xfs_attr3_leaf_name_remote(leaf, i); name = name_rmt->name; namelen = name_rmt->namelen; + value = NULL; valuelen = be32_to_cpu(name_rmt->valuelen); } @@ -513,7 +519,7 @@ xfs_attr3_leaf_list_int( return -EFSCORRUPTED; } context->put_listent(context, entry->flags, - name, namelen, valuelen); + name, namelen, value, valuelen); if (context->seen_enough) break; cursor->offset++; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index e30f9f40f086..7a2a5cf06a5c 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -310,6 +310,7 @@ xfs_ioc_attr_put_listent( int flags, unsigned char *name, int namelen, + void *value, int valuelen) { struct xfs_attrlist *alist = context->buffer; diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 1e82d11d980f..b43f7081b0f4 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -222,6 +222,7 @@ xfs_xattr_put_listent( int flags, unsigned char *name, int namelen, + void *value, int valuelen) { char *prefix; From af69d852dfe62b925d0df401eafad40698c889c6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:54 -0700 Subject: [PATCH 23/30] xfs: move handle ioctl code to xfs_handle.c Move the handle managemnet code (and the attrmulti code that uses it) to xfs_handle.c. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/xfs_handle.c | 618 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_handle.h | 28 ++ fs/xfs/xfs_ioctl.c | 592 +---------------------------------------- fs/xfs/xfs_ioctl.h | 28 -- fs/xfs/xfs_ioctl32.c | 1 + 6 files changed, 649 insertions(+), 619 deletions(-) create mode 100644 fs/xfs/xfs_handle.c create mode 100644 fs/xfs/xfs_handle.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0c1a0b67af93..c969b11ce0f4 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -78,6 +78,7 @@ xfs-y += xfs_aops.o \ xfs_fsmap.o \ xfs_fsops.o \ xfs_globals.o \ + xfs_handle.o \ xfs_health.o \ xfs_icache.o \ xfs_ioctl.o \ diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c new file mode 100644 index 000000000000..13c2479a3053 --- /dev/null +++ b/fs/xfs/xfs_handle.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All rights reserved. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_ioctl.h" +#include "xfs_parent.h" +#include "xfs_da_btree.h" +#include "xfs_handle.h" +#include "xfs_health.h" +#include "xfs_icache.h" +#include "xfs_export.h" +#include "xfs_xattr.h" +#include "xfs_acl.h" + +#include + +/* + * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to + * a file or fs handle. + * + * XFS_IOC_PATH_TO_FSHANDLE + * returns fs handle for a mount point or path within that mount point + * XFS_IOC_FD_TO_HANDLE + * returns full handle for a FD opened in user space + * XFS_IOC_PATH_TO_HANDLE + * returns full handle for a path + */ +int +xfs_find_handle( + unsigned int cmd, + xfs_fsop_handlereq_t *hreq) +{ + int hsize; + xfs_handle_t handle; + struct inode *inode; + struct fd f = {NULL}; + struct path path; + int error; + struct xfs_inode *ip; + + if (cmd == XFS_IOC_FD_TO_HANDLE) { + f = fdget(hreq->fd); + if (!f.file) + return -EBADF; + inode = file_inode(f.file); + } else { + error = user_path_at(AT_FDCWD, hreq->path, 0, &path); + if (error) + return error; + inode = d_inode(path.dentry); + } + ip = XFS_I(inode); + + /* + * We can only generate handles for inodes residing on a XFS filesystem, + * and only for regular files, directories or symbolic links. + */ + error = -EINVAL; + if (inode->i_sb->s_magic != XFS_SB_MAGIC) + goto out_put; + + error = -EBADF; + if (!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode) && + !S_ISLNK(inode->i_mode)) + goto out_put; + + + memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); + + if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { + /* + * This handle only contains an fsid, zero the rest. + */ + memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); + hsize = sizeof(xfs_fsid_t); + } else { + handle.ha_fid.fid_len = sizeof(xfs_fid_t) - + sizeof(handle.ha_fid.fid_len); + handle.ha_fid.fid_pad = 0; + handle.ha_fid.fid_gen = inode->i_generation; + handle.ha_fid.fid_ino = ip->i_ino; + hsize = sizeof(xfs_handle_t); + } + + error = -EFAULT; + if (copy_to_user(hreq->ohandle, &handle, hsize) || + copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) + goto out_put; + + error = 0; + + out_put: + if (cmd == XFS_IOC_FD_TO_HANDLE) + fdput(f); + else + path_put(&path); + return error; +} + +/* + * No need to do permission checks on the various pathname components + * as the handle operations are privileged. + */ +STATIC int +xfs_handle_acceptable( + void *context, + struct dentry *dentry) +{ + return 1; +} + +/* + * Convert userspace handle data into a dentry. + */ +struct dentry * +xfs_handle_to_dentry( + struct file *parfilp, + void __user *uhandle, + u32 hlen) +{ + xfs_handle_t handle; + struct xfs_fid64 fid; + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(file_inode(parfilp)->i_mode)) + return ERR_PTR(-ENOTDIR); + + if (hlen != sizeof(xfs_handle_t)) + return ERR_PTR(-EINVAL); + if (copy_from_user(&handle, uhandle, hlen)) + return ERR_PTR(-EFAULT); + if (handle.ha_fid.fid_len != + sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) + return ERR_PTR(-EINVAL); + + memset(&fid, 0, sizeof(struct fid)); + fid.ino = handle.ha_fid.fid_ino; + fid.gen = handle.ha_fid.fid_gen; + + return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, + FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, + xfs_handle_acceptable, NULL); +} + +STATIC struct dentry * +xfs_handlereq_to_dentry( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen); +} + +int +xfs_open_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + const struct cred *cred = current_cred(); + int error; + int fd; + int permflag; + struct file *filp; + struct inode *inode; + struct dentry *dentry; + fmode_t fmode; + struct path path; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + inode = d_inode(dentry); + + /* Restrict xfs_open_by_handle to directories & regular files. */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { + error = -EPERM; + goto out_dput; + } + +#if BITS_PER_LONG != 32 + hreq->oflags |= O_LARGEFILE; +#endif + + permflag = hreq->oflags; + fmode = OPEN_FMODE(permflag); + if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && + (fmode & FMODE_WRITE) && IS_APPEND(inode)) { + error = -EPERM; + goto out_dput; + } + + if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) { + error = -EPERM; + goto out_dput; + } + + /* Can't write directories. */ + if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) { + error = -EISDIR; + goto out_dput; + } + + fd = get_unused_fd_flags(0); + if (fd < 0) { + error = fd; + goto out_dput; + } + + path.mnt = parfilp->f_path.mnt; + path.dentry = dentry; + filp = dentry_open(&path, hreq->oflags, cred); + dput(dentry); + if (IS_ERR(filp)) { + put_unused_fd(fd); + return PTR_ERR(filp); + } + + if (S_ISREG(inode->i_mode)) { + filp->f_flags |= O_NOATIME; + filp->f_mode |= FMODE_NOCMTIME; + } + + fd_install(fd, filp); + return fd; + + out_dput: + dput(dentry); + return error; +} + +int +xfs_readlink_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + struct dentry *dentry; + __u32 olen; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + /* Restrict this handle operation to symlinks only. */ + if (!d_is_symlink(dentry)) { + error = -EINVAL; + goto out_dput; + } + + if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { + error = -EFAULT; + goto out_dput; + } + + error = vfs_readlink(dentry, hreq->ohandle, olen); + + out_dput: + dput(dentry); + return error; +} + +/* + * Format an attribute and copy it out to the user's buffer. + * Take care to check values and protect against them changing later, + * we may be reading them directly out of a user buffer. + */ +static void +xfs_ioc_attr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + void *value, + int valuelen) +{ + struct xfs_attrlist *alist = context->buffer; + struct xfs_attrlist_ent *aep; + int arraytop; + + ASSERT(!context->seen_enough); + ASSERT(context->count >= 0); + ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); + ASSERT(context->firstu >= sizeof(*alist)); + ASSERT(context->firstu <= context->bufsize); + + /* + * Only list entries in the right namespace. + */ + if (context->attr_filter != (flags & XFS_ATTR_NSP_ONDISK_MASK)) + return; + + arraytop = sizeof(*alist) + + context->count * sizeof(alist->al_offset[0]); + + /* decrement by the actual bytes used by the attr */ + context->firstu -= round_up(offsetof(struct xfs_attrlist_ent, a_name) + + namelen + 1, sizeof(uint32_t)); + if (context->firstu < arraytop) { + trace_xfs_attr_list_full(context); + alist->al_more = 1; + context->seen_enough = 1; + return; + } + + aep = context->buffer + context->firstu; + aep->a_valuelen = valuelen; + memcpy(aep->a_name, name, namelen); + aep->a_name[namelen] = 0; + alist->al_offset[context->count++] = context->firstu; + alist->al_count = context->count; + trace_xfs_attr_list_add(context); +} + +static unsigned int +xfs_attr_filter( + u32 ioc_flags) +{ + if (ioc_flags & XFS_IOC_ATTR_ROOT) + return XFS_ATTR_ROOT; + if (ioc_flags & XFS_IOC_ATTR_SECURE) + return XFS_ATTR_SECURE; + return 0; +} + +static inline enum xfs_attr_update +xfs_xattr_flags( + u32 ioc_flags, + void *value) +{ + if (!value) + return XFS_ATTRUPDATE_REMOVE; + if (ioc_flags & XFS_IOC_ATTR_CREATE) + return XFS_ATTRUPDATE_CREATE; + if (ioc_flags & XFS_IOC_ATTR_REPLACE) + return XFS_ATTRUPDATE_REPLACE; + return XFS_ATTRUPDATE_UPSERT; +} + +int +xfs_ioc_attr_list( + struct xfs_inode *dp, + void __user *ubuf, + size_t bufsize, + int flags, + struct xfs_attrlist_cursor __user *ucursor) +{ + struct xfs_attr_list_context context = { }; + struct xfs_attrlist *alist; + void *buffer; + int error; + + if (bufsize < sizeof(struct xfs_attrlist) || + bufsize > XFS_XATTR_LIST_MAX) + return -EINVAL; + + /* + * Reject flags, only allow namespaces. + */ + if (flags & ~(XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) + return -EINVAL; + if (flags == (XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) + return -EINVAL; + + /* + * Validate the cursor. + */ + if (copy_from_user(&context.cursor, ucursor, sizeof(context.cursor))) + return -EFAULT; + if (context.cursor.pad1 || context.cursor.pad2) + return -EINVAL; + if (!context.cursor.initted && + (context.cursor.hashval || context.cursor.blkno || + context.cursor.offset)) + return -EINVAL; + + buffer = kvzalloc(bufsize, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + /* + * Initialize the output buffer. + */ + context.dp = dp; + context.resynch = 1; + context.attr_filter = xfs_attr_filter(flags); + context.buffer = buffer; + context.bufsize = round_down(bufsize, sizeof(uint32_t)); + context.firstu = context.bufsize; + context.put_listent = xfs_ioc_attr_put_listent; + + alist = context.buffer; + alist->al_count = 0; + alist->al_more = 0; + alist->al_offset[0] = context.bufsize; + + error = xfs_attr_list(&context); + if (error) + goto out_free; + + if (copy_to_user(ubuf, buffer, bufsize) || + copy_to_user(ucursor, &context.cursor, sizeof(context.cursor))) + error = -EFAULT; +out_free: + kvfree(buffer); + return error; +} + +int +xfs_attrlist_by_handle( + struct file *parfilp, + struct xfs_fsop_attrlist_handlereq __user *p) +{ + struct xfs_fsop_attrlist_handlereq al_hreq; + struct dentry *dentry; + int error = -ENOMEM; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&al_hreq, p, sizeof(al_hreq))) + return -EFAULT; + + dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), al_hreq.buffer, + al_hreq.buflen, al_hreq.flags, &p->pos); + dput(dentry); + return error; +} + +static int +xfs_attrmulti_attr_get( + struct inode *inode, + unsigned char *name, + unsigned char __user *ubuf, + uint32_t *len, + uint32_t flags) +{ + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .name = name, + .namelen = strlen(name), + .valuelen = *len, + }; + int error; + + if (*len > XFS_XATTR_SIZE_MAX) + return -EINVAL; + + error = xfs_attr_get(&args); + if (error) + goto out_kfree; + + *len = args.valuelen; + if (copy_to_user(ubuf, args.value, args.valuelen)) + error = -EFAULT; + +out_kfree: + kvfree(args.value); + return error; +} + +static int +xfs_attrmulti_attr_set( + struct inode *inode, + unsigned char *name, + const unsigned char __user *ubuf, + uint32_t len, + uint32_t flags) +{ + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .name = name, + .namelen = strlen(name), + }; + int error; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return -EPERM; + + if (ubuf) { + if (len > XFS_XATTR_SIZE_MAX) + return -EINVAL; + args.value = memdup_user(ubuf, len); + if (IS_ERR(args.value)) + return PTR_ERR(args.value); + args.valuelen = len; + } + + error = xfs_attr_change(&args, xfs_xattr_flags(flags, args.value)); + if (!error && (flags & XFS_IOC_ATTR_ROOT)) + xfs_forget_acl(inode, name); + kfree(args.value); + return error; +} + +int +xfs_ioc_attrmulti_one( + struct file *parfilp, + struct inode *inode, + uint32_t opcode, + void __user *uname, + void __user *value, + uint32_t *len, + uint32_t flags) +{ + unsigned char *name; + int error; + + if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE)) + return -EINVAL; + + name = strndup_user(uname, MAXNAMELEN); + if (IS_ERR(name)) + return PTR_ERR(name); + + switch (opcode) { + case ATTR_OP_GET: + error = xfs_attrmulti_attr_get(inode, name, value, len, flags); + break; + case ATTR_OP_REMOVE: + value = NULL; + *len = 0; + fallthrough; + case ATTR_OP_SET: + error = mnt_want_write_file(parfilp); + if (error) + break; + error = xfs_attrmulti_attr_set(inode, name, value, *len, flags); + mnt_drop_write_file(parfilp); + break; + default: + error = -EINVAL; + break; + } + + kfree(name); + return error; +} + +int +xfs_attrmulti_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + xfs_attr_multiop_t *ops; + xfs_fsop_attrmulti_handlereq_t am_hreq; + struct dentry *dentry; + unsigned int i, size; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) + return -EFAULT; + + /* overflow check */ + if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) + return -E2BIG; + + dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = -E2BIG; + size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) + goto out_dput; + + ops = memdup_user(am_hreq.ops, size); + if (IS_ERR(ops)) { + error = PTR_ERR(ops); + goto out_dput; + } + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_error = xfs_ioc_attrmulti_one(parfilp, + d_inode(dentry), ops[i].am_opcode, + ops[i].am_attrname, ops[i].am_attrvalue, + &ops[i].am_length, ops[i].am_flags); + } + + if (copy_to_user(am_hreq.ops, ops, size)) + error = -EFAULT; + + kfree(ops); + out_dput: + dput(dentry); + return error; +} diff --git a/fs/xfs/xfs_handle.h b/fs/xfs/xfs_handle.h new file mode 100644 index 000000000000..e39eaf4689da --- /dev/null +++ b/fs/xfs/xfs_handle.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All rights reserved. + */ +#ifndef __XFS_HANDLE_H__ +#define __XFS_HANDLE_H__ + +int xfs_attrlist_by_handle(struct file *parfilp, + struct xfs_fsop_attrlist_handlereq __user *p); +int xfs_attrmulti_by_handle(struct file *parfilp, void __user *arg); + +int xfs_find_handle(unsigned int cmd, struct xfs_fsop_handlereq *hreq); +int xfs_open_by_handle(struct file *parfilp, struct xfs_fsop_handlereq *hreq); +int xfs_readlink_by_handle(struct file *parfilp, + struct xfs_fsop_handlereq *hreq); + +int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode, + uint32_t opcode, void __user *uname, void __user *value, + uint32_t *len, uint32_t flags); +int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, + size_t bufsize, int flags, + struct xfs_attrlist_cursor __user *ucursor); + +struct dentry *xfs_handle_to_dentry(struct file *parfilp, void __user *uhandle, + u32 hlen); + +#endif /* __XFS_HANDLE_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 7a2a5cf06a5c..ed05fcd6261d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -23,11 +23,9 @@ #include "xfs_fsops.h" #include "xfs_discard.h" #include "xfs_quota.h" -#include "xfs_export.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_trans.h" -#include "xfs_acl.h" #include "xfs_btree.h" #include #include "xfs_fsmap.h" @@ -37,602 +35,14 @@ #include "xfs_health.h" #include "xfs_reflink.h" #include "xfs_ioctl.h" -#include "xfs_xattr.h" #include "xfs_rtbitmap.h" #include "xfs_file.h" #include "xfs_exchrange.h" +#include "xfs_handle.h" #include -#include #include -/* - * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to - * a file or fs handle. - * - * XFS_IOC_PATH_TO_FSHANDLE - * returns fs handle for a mount point or path within that mount point - * XFS_IOC_FD_TO_HANDLE - * returns full handle for a FD opened in user space - * XFS_IOC_PATH_TO_HANDLE - * returns full handle for a path - */ -int -xfs_find_handle( - unsigned int cmd, - xfs_fsop_handlereq_t *hreq) -{ - int hsize; - xfs_handle_t handle; - struct inode *inode; - struct fd f = {NULL}; - struct path path; - int error; - struct xfs_inode *ip; - - if (cmd == XFS_IOC_FD_TO_HANDLE) { - f = fdget(hreq->fd); - if (!f.file) - return -EBADF; - inode = file_inode(f.file); - } else { - error = user_path_at(AT_FDCWD, hreq->path, 0, &path); - if (error) - return error; - inode = d_inode(path.dentry); - } - ip = XFS_I(inode); - - /* - * We can only generate handles for inodes residing on a XFS filesystem, - * and only for regular files, directories or symbolic links. - */ - error = -EINVAL; - if (inode->i_sb->s_magic != XFS_SB_MAGIC) - goto out_put; - - error = -EBADF; - if (!S_ISREG(inode->i_mode) && - !S_ISDIR(inode->i_mode) && - !S_ISLNK(inode->i_mode)) - goto out_put; - - - memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); - - if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { - /* - * This handle only contains an fsid, zero the rest. - */ - memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); - hsize = sizeof(xfs_fsid_t); - } else { - handle.ha_fid.fid_len = sizeof(xfs_fid_t) - - sizeof(handle.ha_fid.fid_len); - handle.ha_fid.fid_pad = 0; - handle.ha_fid.fid_gen = inode->i_generation; - handle.ha_fid.fid_ino = ip->i_ino; - hsize = sizeof(xfs_handle_t); - } - - error = -EFAULT; - if (copy_to_user(hreq->ohandle, &handle, hsize) || - copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) - goto out_put; - - error = 0; - - out_put: - if (cmd == XFS_IOC_FD_TO_HANDLE) - fdput(f); - else - path_put(&path); - return error; -} - -/* - * No need to do permission checks on the various pathname components - * as the handle operations are privileged. - */ -STATIC int -xfs_handle_acceptable( - void *context, - struct dentry *dentry) -{ - return 1; -} - -/* - * Convert userspace handle data into a dentry. - */ -struct dentry * -xfs_handle_to_dentry( - struct file *parfilp, - void __user *uhandle, - u32 hlen) -{ - xfs_handle_t handle; - struct xfs_fid64 fid; - - /* - * Only allow handle opens under a directory. - */ - if (!S_ISDIR(file_inode(parfilp)->i_mode)) - return ERR_PTR(-ENOTDIR); - - if (hlen != sizeof(xfs_handle_t)) - return ERR_PTR(-EINVAL); - if (copy_from_user(&handle, uhandle, hlen)) - return ERR_PTR(-EFAULT); - if (handle.ha_fid.fid_len != - sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) - return ERR_PTR(-EINVAL); - - memset(&fid, 0, sizeof(struct fid)); - fid.ino = handle.ha_fid.fid_ino; - fid.gen = handle.ha_fid.fid_gen; - - return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, - FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, - xfs_handle_acceptable, NULL); -} - -STATIC struct dentry * -xfs_handlereq_to_dentry( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen); -} - -int -xfs_open_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - const struct cred *cred = current_cred(); - int error; - int fd; - int permflag; - struct file *filp; - struct inode *inode; - struct dentry *dentry; - fmode_t fmode; - struct path path; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - dentry = xfs_handlereq_to_dentry(parfilp, hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - inode = d_inode(dentry); - - /* Restrict xfs_open_by_handle to directories & regular files. */ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { - error = -EPERM; - goto out_dput; - } - -#if BITS_PER_LONG != 32 - hreq->oflags |= O_LARGEFILE; -#endif - - permflag = hreq->oflags; - fmode = OPEN_FMODE(permflag); - if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && - (fmode & FMODE_WRITE) && IS_APPEND(inode)) { - error = -EPERM; - goto out_dput; - } - - if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) { - error = -EPERM; - goto out_dput; - } - - /* Can't write directories. */ - if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) { - error = -EISDIR; - goto out_dput; - } - - fd = get_unused_fd_flags(0); - if (fd < 0) { - error = fd; - goto out_dput; - } - - path.mnt = parfilp->f_path.mnt; - path.dentry = dentry; - filp = dentry_open(&path, hreq->oflags, cred); - dput(dentry); - if (IS_ERR(filp)) { - put_unused_fd(fd); - return PTR_ERR(filp); - } - - if (S_ISREG(inode->i_mode)) { - filp->f_flags |= O_NOATIME; - filp->f_mode |= FMODE_NOCMTIME; - } - - fd_install(fd, filp); - return fd; - - out_dput: - dput(dentry); - return error; -} - -int -xfs_readlink_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - struct dentry *dentry; - __u32 olen; - int error; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - dentry = xfs_handlereq_to_dentry(parfilp, hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - /* Restrict this handle operation to symlinks only. */ - if (!d_is_symlink(dentry)) { - error = -EINVAL; - goto out_dput; - } - - if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { - error = -EFAULT; - goto out_dput; - } - - error = vfs_readlink(dentry, hreq->ohandle, olen); - - out_dput: - dput(dentry); - return error; -} - -/* - * Format an attribute and copy it out to the user's buffer. - * Take care to check values and protect against them changing later, - * we may be reading them directly out of a user buffer. - */ -static void -xfs_ioc_attr_put_listent( - struct xfs_attr_list_context *context, - int flags, - unsigned char *name, - int namelen, - void *value, - int valuelen) -{ - struct xfs_attrlist *alist = context->buffer; - struct xfs_attrlist_ent *aep; - int arraytop; - - ASSERT(!context->seen_enough); - ASSERT(context->count >= 0); - ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); - ASSERT(context->firstu >= sizeof(*alist)); - ASSERT(context->firstu <= context->bufsize); - - /* - * Only list entries in the right namespace. - */ - if (context->attr_filter != (flags & XFS_ATTR_NSP_ONDISK_MASK)) - return; - - arraytop = sizeof(*alist) + - context->count * sizeof(alist->al_offset[0]); - - /* decrement by the actual bytes used by the attr */ - context->firstu -= round_up(offsetof(struct xfs_attrlist_ent, a_name) + - namelen + 1, sizeof(uint32_t)); - if (context->firstu < arraytop) { - trace_xfs_attr_list_full(context); - alist->al_more = 1; - context->seen_enough = 1; - return; - } - - aep = context->buffer + context->firstu; - aep->a_valuelen = valuelen; - memcpy(aep->a_name, name, namelen); - aep->a_name[namelen] = 0; - alist->al_offset[context->count++] = context->firstu; - alist->al_count = context->count; - trace_xfs_attr_list_add(context); -} - -static unsigned int -xfs_attr_filter( - u32 ioc_flags) -{ - if (ioc_flags & XFS_IOC_ATTR_ROOT) - return XFS_ATTR_ROOT; - if (ioc_flags & XFS_IOC_ATTR_SECURE) - return XFS_ATTR_SECURE; - return 0; -} - -static inline enum xfs_attr_update -xfs_xattr_flags( - u32 ioc_flags, - void *value) -{ - if (!value) - return XFS_ATTRUPDATE_REMOVE; - if (ioc_flags & XFS_IOC_ATTR_CREATE) - return XFS_ATTRUPDATE_CREATE; - if (ioc_flags & XFS_IOC_ATTR_REPLACE) - return XFS_ATTRUPDATE_REPLACE; - return XFS_ATTRUPDATE_UPSERT; -} - -int -xfs_ioc_attr_list( - struct xfs_inode *dp, - void __user *ubuf, - size_t bufsize, - int flags, - struct xfs_attrlist_cursor __user *ucursor) -{ - struct xfs_attr_list_context context = { }; - struct xfs_attrlist *alist; - void *buffer; - int error; - - if (bufsize < sizeof(struct xfs_attrlist) || - bufsize > XFS_XATTR_LIST_MAX) - return -EINVAL; - - /* - * Reject flags, only allow namespaces. - */ - if (flags & ~(XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) - return -EINVAL; - if (flags == (XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) - return -EINVAL; - - /* - * Validate the cursor. - */ - if (copy_from_user(&context.cursor, ucursor, sizeof(context.cursor))) - return -EFAULT; - if (context.cursor.pad1 || context.cursor.pad2) - return -EINVAL; - if (!context.cursor.initted && - (context.cursor.hashval || context.cursor.blkno || - context.cursor.offset)) - return -EINVAL; - - buffer = kvzalloc(bufsize, GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - /* - * Initialize the output buffer. - */ - context.dp = dp; - context.resynch = 1; - context.attr_filter = xfs_attr_filter(flags); - context.buffer = buffer; - context.bufsize = round_down(bufsize, sizeof(uint32_t)); - context.firstu = context.bufsize; - context.put_listent = xfs_ioc_attr_put_listent; - - alist = context.buffer; - alist->al_count = 0; - alist->al_more = 0; - alist->al_offset[0] = context.bufsize; - - error = xfs_attr_list(&context); - if (error) - goto out_free; - - if (copy_to_user(ubuf, buffer, bufsize) || - copy_to_user(ucursor, &context.cursor, sizeof(context.cursor))) - error = -EFAULT; -out_free: - kvfree(buffer); - return error; -} - -STATIC int -xfs_attrlist_by_handle( - struct file *parfilp, - struct xfs_fsop_attrlist_handlereq __user *p) -{ - struct xfs_fsop_attrlist_handlereq al_hreq; - struct dentry *dentry; - int error = -ENOMEM; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&al_hreq, p, sizeof(al_hreq))) - return -EFAULT; - - dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), al_hreq.buffer, - al_hreq.buflen, al_hreq.flags, &p->pos); - dput(dentry); - return error; -} - -static int -xfs_attrmulti_attr_get( - struct inode *inode, - unsigned char *name, - unsigned char __user *ubuf, - uint32_t *len, - uint32_t flags) -{ - struct xfs_da_args args = { - .dp = XFS_I(inode), - .attr_filter = xfs_attr_filter(flags), - .name = name, - .namelen = strlen(name), - .valuelen = *len, - }; - int error; - - if (*len > XFS_XATTR_SIZE_MAX) - return -EINVAL; - - error = xfs_attr_get(&args); - if (error) - goto out_kfree; - - *len = args.valuelen; - if (copy_to_user(ubuf, args.value, args.valuelen)) - error = -EFAULT; - -out_kfree: - kvfree(args.value); - return error; -} - -static int -xfs_attrmulti_attr_set( - struct inode *inode, - unsigned char *name, - const unsigned char __user *ubuf, - uint32_t len, - uint32_t flags) -{ - struct xfs_da_args args = { - .dp = XFS_I(inode), - .attr_filter = xfs_attr_filter(flags), - .name = name, - .namelen = strlen(name), - }; - int error; - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - - if (ubuf) { - if (len > XFS_XATTR_SIZE_MAX) - return -EINVAL; - args.value = memdup_user(ubuf, len); - if (IS_ERR(args.value)) - return PTR_ERR(args.value); - args.valuelen = len; - } - - error = xfs_attr_change(&args, xfs_xattr_flags(flags, args.value)); - if (!error && (flags & XFS_IOC_ATTR_ROOT)) - xfs_forget_acl(inode, name); - kfree(args.value); - return error; -} - -int -xfs_ioc_attrmulti_one( - struct file *parfilp, - struct inode *inode, - uint32_t opcode, - void __user *uname, - void __user *value, - uint32_t *len, - uint32_t flags) -{ - unsigned char *name; - int error; - - if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE)) - return -EINVAL; - - name = strndup_user(uname, MAXNAMELEN); - if (IS_ERR(name)) - return PTR_ERR(name); - - switch (opcode) { - case ATTR_OP_GET: - error = xfs_attrmulti_attr_get(inode, name, value, len, flags); - break; - case ATTR_OP_REMOVE: - value = NULL; - *len = 0; - fallthrough; - case ATTR_OP_SET: - error = mnt_want_write_file(parfilp); - if (error) - break; - error = xfs_attrmulti_attr_set(inode, name, value, *len, flags); - mnt_drop_write_file(parfilp); - break; - default: - error = -EINVAL; - break; - } - - kfree(name); - return error; -} - -STATIC int -xfs_attrmulti_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - xfs_attr_multiop_t *ops; - xfs_fsop_attrmulti_handlereq_t am_hreq; - struct dentry *dentry; - unsigned int i, size; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) - return -EFAULT; - - /* overflow check */ - if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) - return -E2BIG; - - dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = -E2BIG; - size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); - if (!size || size > 16 * PAGE_SIZE) - goto out_dput; - - ops = memdup_user(am_hreq.ops, size); - if (IS_ERR(ops)) { - error = PTR_ERR(ops); - goto out_dput; - } - - error = 0; - for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = xfs_ioc_attrmulti_one(parfilp, - d_inode(dentry), ops[i].am_opcode, - ops[i].am_attrname, ops[i].am_attrvalue, - &ops[i].am_length, ops[i].am_flags); - } - - if (copy_to_user(am_hreq.ops, ops, size)) - error = -EFAULT; - - kfree(ops); - out_dput: - dput(dentry); - return error; -} - /* Return 0 on success or positive error */ int xfs_fsbulkstat_one_fmt( diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 38be600b5e1e..12124946f347 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -14,34 +14,6 @@ int xfs_ioc_swapext( xfs_swapext_t *sxp); -extern int -xfs_find_handle( - unsigned int cmd, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_open_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_readlink_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq); - -int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode, - uint32_t opcode, void __user *uname, void __user *value, - uint32_t *len, uint32_t flags); -int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, - size_t bufsize, int flags, - struct xfs_attrlist_cursor __user *ucursor); - -extern struct dentry * -xfs_handle_to_dentry( - struct file *parfilp, - void __user *uhandle, - u32 hlen); - extern int xfs_fileattr_get( struct dentry *dentry, diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index ee35eea1ecce..b64785dc4354 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -24,6 +24,7 @@ #include "xfs_ioctl32.h" #include "xfs_trace.h" #include "xfs_sb.h" +#include "xfs_handle.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) From b8c9d4253da43c02b287831f7e576568f24fbe58 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:55 -0700 Subject: [PATCH 24/30] xfs: split out handle management helpers a bit Split out the functions that generate file/fs handles and map them back into dentries in preparation for the GETPARENTS ioctl next. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 4 +- fs/xfs/xfs_handle.c | 98 +++++++++++++++++++++++++++++------------- 2 files changed, 70 insertions(+), 32 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 53526fca7386..97384ab95de4 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -633,7 +633,9 @@ typedef struct xfs_fsop_attrmulti_handlereq { /* * per machine unique filesystem identifier types. */ -typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */ +typedef struct xfs_fsid { + __u32 val[2]; /* file system id type */ +} xfs_fsid_t; typedef struct xfs_fid { __u16 fid_len; /* length of remainder */ diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c index 13c2479a3053..b9f4d9860682 100644 --- a/fs/xfs/xfs_handle.c +++ b/fs/xfs/xfs_handle.c @@ -30,6 +30,42 @@ #include +static inline size_t +xfs_filehandle_fid_len(void) +{ + struct xfs_handle *handle = NULL; + + return sizeof(struct xfs_fid) - sizeof(handle->ha_fid.fid_len); +} + +static inline size_t +xfs_filehandle_init( + struct xfs_mount *mp, + xfs_ino_t ino, + uint32_t gen, + struct xfs_handle *handle) +{ + memcpy(&handle->ha_fsid, mp->m_fixedfsid, sizeof(struct xfs_fsid)); + + handle->ha_fid.fid_len = xfs_filehandle_fid_len(); + handle->ha_fid.fid_pad = 0; + handle->ha_fid.fid_gen = gen; + handle->ha_fid.fid_ino = ino; + + return sizeof(struct xfs_handle); +} + +static inline size_t +xfs_fshandle_init( + struct xfs_mount *mp, + struct xfs_handle *handle) +{ + memcpy(&handle->ha_fsid, mp->m_fixedfsid, sizeof(struct xfs_fsid)); + memset(&handle->ha_fid, 0, sizeof(handle->ha_fid)); + + return sizeof(struct xfs_fsid); +} + /* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to * a file or fs handle. @@ -84,20 +120,11 @@ xfs_find_handle( memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); - if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { - /* - * This handle only contains an fsid, zero the rest. - */ - memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); - hsize = sizeof(xfs_fsid_t); - } else { - handle.ha_fid.fid_len = sizeof(xfs_fid_t) - - sizeof(handle.ha_fid.fid_len); - handle.ha_fid.fid_pad = 0; - handle.ha_fid.fid_gen = inode->i_generation; - handle.ha_fid.fid_ino = ip->i_ino; - hsize = sizeof(xfs_handle_t); - } + if (cmd == XFS_IOC_PATH_TO_FSHANDLE) + hsize = xfs_fshandle_init(ip->i_mount, &handle); + else + hsize = xfs_filehandle_init(ip->i_mount, ip->i_ino, + inode->i_generation, &handle); error = -EFAULT; if (copy_to_user(hreq->ohandle, &handle, hsize) || @@ -126,6 +153,31 @@ xfs_handle_acceptable( return 1; } +/* Convert handle already copied to kernel space into a dentry. */ +static struct dentry * +xfs_khandle_to_dentry( + struct file *file, + struct xfs_handle *handle) +{ + struct xfs_fid64 fid = { + .ino = handle->ha_fid.fid_ino, + .gen = handle->ha_fid.fid_gen, + }; + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(file_inode(file)->i_mode)) + return ERR_PTR(-ENOTDIR); + + if (handle->ha_fid.fid_len != xfs_filehandle_fid_len()) + return ERR_PTR(-EINVAL); + + return exportfs_decode_fh(file->f_path.mnt, (struct fid *)&fid, 3, + FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, + xfs_handle_acceptable, NULL); +} + /* * Convert userspace handle data into a dentry. */ @@ -136,29 +188,13 @@ xfs_handle_to_dentry( u32 hlen) { xfs_handle_t handle; - struct xfs_fid64 fid; - - /* - * Only allow handle opens under a directory. - */ - if (!S_ISDIR(file_inode(parfilp)->i_mode)) - return ERR_PTR(-ENOTDIR); if (hlen != sizeof(xfs_handle_t)) return ERR_PTR(-EINVAL); if (copy_from_user(&handle, uhandle, hlen)) return ERR_PTR(-EFAULT); - if (handle.ha_fid.fid_len != - sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) - return ERR_PTR(-EINVAL); - memset(&fid, 0, sizeof(struct fid)); - fid.ino = handle.ha_fid.fid_ino; - fid.gen = handle.ha_fid.fid_gen; - - return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, - FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, - xfs_handle_acceptable, NULL); + return xfs_khandle_to_dentry(parfilp, &handle); } STATIC struct dentry * From 233f4e12bbb2c5fb1588b857336a26e8bb6942af Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:55 -0700 Subject: [PATCH 25/30] xfs: add parent pointer ioctls This patch adds a pair of new file ioctls to retrieve the parent pointer of a given inode. They both return the same results, but one operates on the file descriptor passed to ioctl() whereas the other allows the caller to specify a file handle for which the caller wants results. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 74 +++++++++ fs/xfs/libxfs/xfs_ondisk.h | 5 + fs/xfs/libxfs/xfs_parent.c | 34 +++++ fs/xfs/libxfs/xfs_parent.h | 5 + fs/xfs/xfs_export.c | 2 +- fs/xfs/xfs_export.h | 2 + fs/xfs/xfs_handle.c | 298 +++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_handle.h | 5 + fs/xfs/xfs_ioctl.c | 6 +- fs/xfs/xfs_trace.c | 1 + fs/xfs/xfs_trace.h | 92 ++++++++++++ 11 files changed, 522 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 97384ab95de4..ea654df0505f 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -816,6 +816,78 @@ struct xfs_exchange_range { XFS_EXCHANGE_RANGE_DRY_RUN | \ XFS_EXCHANGE_RANGE_FILE1_WRITTEN) +/* Iterating parent pointers of files. */ + +/* target was the root directory */ +#define XFS_GETPARENTS_OFLAG_ROOT (1U << 0) + +/* Cursor is done iterating pptrs */ +#define XFS_GETPARENTS_OFLAG_DONE (1U << 1) + +#define XFS_GETPARENTS_OFLAGS_ALL (XFS_GETPARENTS_OFLAG_ROOT | \ + XFS_GETPARENTS_OFLAG_DONE) + +#define XFS_GETPARENTS_IFLAGS_ALL (0) + +struct xfs_getparents_rec { + struct xfs_handle gpr_parent; /* Handle to parent */ + __u32 gpr_reclen; /* Length of entire record */ + __u32 gpr_reserved; /* zero */ + char gpr_name[]; /* Null-terminated filename */ +}; + +/* Iterate through this file's directory parent pointers */ +struct xfs_getparents { + /* + * Structure to track progress in iterating the parent pointers. + * Must be initialized to zeroes before the first ioctl call, and + * not touched by callers after that. + */ + struct xfs_attrlist_cursor gp_cursor; + + /* Input flags: XFS_GETPARENTS_IFLAG* */ + __u16 gp_iflags; + + /* Output flags: XFS_GETPARENTS_OFLAG* */ + __u16 gp_oflags; + + /* Size of the gp_buffer in bytes */ + __u32 gp_bufsize; + + /* Must be set to zero */ + __u64 gp_reserved; + + /* Pointer to a buffer in which to place xfs_getparents_rec */ + __u64 gp_buffer; +}; + +static inline struct xfs_getparents_rec * +xfs_getparents_first_rec(struct xfs_getparents *gp) +{ + return (struct xfs_getparents_rec *)(uintptr_t)gp->gp_buffer; +} + +static inline struct xfs_getparents_rec * +xfs_getparents_next_rec(struct xfs_getparents *gp, + struct xfs_getparents_rec *gpr) +{ + void *next = ((void *)gpr + gpr->gpr_reclen); + void *end = (void *)(uintptr_t)(gp->gp_buffer + gp->gp_bufsize); + + if (next >= end) + return NULL; + + return next; +} + +/* Iterate through this file handle's directory parent pointers. */ +struct xfs_getparents_by_handle { + /* Handle to file whose parents we want. */ + struct xfs_handle gph_handle; + + struct xfs_getparents gph_request; +}; + /* * ioctl commands that are used by Linux filesystems */ @@ -851,6 +923,8 @@ struct xfs_exchange_range { /* XFS_IOC_GETFSMAP ------ hoisted 59 */ #define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata) #define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry) +#define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents) +#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 25952ef584ee..e8cdd77d03fa 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -156,6 +156,11 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16); XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16); + /* parent pointer ioctls */ + XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_rec, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_getparents, 40); + XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_by_handle, 64); + /* * The v5 superblock format extended several v4 header structures with * additional data. While new fields are only accessible on v5 diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index fdf643bfde4d..504de1ef3387 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -257,3 +257,37 @@ xfs_parent_replacename( xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REPLACE); return 0; } + +/* + * Extract parent pointer information from any parent pointer xattr into + * @parent_ino/gen. The last two parameters can be NULL pointers. + * + * Returns 0 if this is not a parent pointer xattr at all; or -EFSCORRUPTED for + * garbage. + */ +int +xfs_parent_from_attr( + struct xfs_mount *mp, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + xfs_ino_t *parent_ino, + uint32_t *parent_gen) +{ + const struct xfs_parent_rec *rec = value; + + ASSERT(attr_flags & XFS_ATTR_PARENT); + + if (!xfs_parent_namecheck(attr_flags, name, namelen)) + return -EFSCORRUPTED; + if (!xfs_parent_valuecheck(mp, value, valuelen)) + return -EFSCORRUPTED; + + if (parent_ino) + *parent_ino = be64_to_cpu(rec->p_ino); + if (parent_gen) + *parent_gen = be32_to_cpu(rec->p_gen); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index 768633b31367..d7ab09e738ad 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -91,4 +91,9 @@ int xfs_parent_replacename(struct xfs_trans *tp, struct xfs_inode *new_dp, const struct xfs_name *new_name, struct xfs_inode *child); +int xfs_parent_from_attr(struct xfs_mount *mp, unsigned int attr_flags, + const unsigned char *name, unsigned int namelen, + const void *value, unsigned int valuelen, + xfs_ino_t *parent_ino, uint32_t *parent_gen); + #endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 4b03221351c0..201489d3de08 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -102,7 +102,7 @@ xfs_fs_encode_fh( return fileid_type; } -STATIC struct inode * +struct inode * xfs_nfs_get_inode( struct super_block *sb, u64 ino, diff --git a/fs/xfs/xfs_export.h b/fs/xfs/xfs_export.h index 64471a3ddb04..3cd85e8901a5 100644 --- a/fs/xfs/xfs_export.h +++ b/fs/xfs/xfs_export.h @@ -57,4 +57,6 @@ struct xfs_fid64 { /* This flag goes on the wire. Don't play with it. */ #define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ +struct inode *xfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 gen); + #endif /* __XFS_EXPORT_H__ */ diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c index b9f4d9860682..c8785ed59543 100644 --- a/fs/xfs/xfs_handle.c +++ b/fs/xfs/xfs_handle.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2022-2024 Oracle. * All rights reserved. */ #include "xfs.h" @@ -178,6 +179,30 @@ xfs_khandle_to_dentry( xfs_handle_acceptable, NULL); } +/* Convert handle already copied to kernel space into an xfs_inode. */ +static struct xfs_inode * +xfs_khandle_to_inode( + struct file *file, + struct xfs_handle *handle) +{ + struct xfs_inode *ip = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip->i_mount; + struct inode *inode; + + if (!S_ISDIR(VFS_I(ip)->i_mode)) + return ERR_PTR(-ENOTDIR); + + if (handle->ha_fid.fid_len != xfs_filehandle_fid_len()) + return ERR_PTR(-EINVAL); + + inode = xfs_nfs_get_inode(mp->m_super, handle->ha_fid.fid_ino, + handle->ha_fid.fid_gen); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + return XFS_I(inode); +} + /* * Convert userspace handle data into a dentry. */ @@ -652,3 +677,276 @@ xfs_attrmulti_by_handle( dput(dentry); return error; } + +struct xfs_getparents_ctx { + struct xfs_attr_list_context context; + struct xfs_getparents_by_handle gph; + + /* File to target */ + struct xfs_inode *ip; + + /* Internal buffer where we format records */ + void *krecords; + + /* Last record filled out */ + struct xfs_getparents_rec *lastrec; + + unsigned int count; +}; + +static inline unsigned int +xfs_getparents_rec_sizeof( + unsigned int namelen) +{ + return round_up(sizeof(struct xfs_getparents_rec) + namelen + 1, + sizeof(uint64_t)); +} + +static void +xfs_getparents_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + void *value, + int valuelen) +{ + struct xfs_getparents_ctx *gpx = + container_of(context, struct xfs_getparents_ctx, context); + struct xfs_inode *ip = context->dp; + struct xfs_mount *mp = ip->i_mount; + struct xfs_getparents *gp = &gpx->gph.gph_request; + struct xfs_getparents_rec *gpr = gpx->krecords + context->firstu; + unsigned short reclen = + xfs_getparents_rec_sizeof(namelen); + xfs_ino_t ino; + uint32_t gen; + int error; + + if (!(flags & XFS_ATTR_PARENT)) + return; + + error = xfs_parent_from_attr(mp, flags, name, namelen, value, valuelen, + &ino, &gen); + if (error) { + xfs_inode_mark_sick(ip, XFS_SICK_INO_PARENT); + context->seen_enough = -EFSCORRUPTED; + return; + } + + /* + * We found a parent pointer, but we've filled up the buffer. Signal + * to the caller that we did /not/ reach the end of the parent pointer + * recordset. + */ + if (context->firstu > context->bufsize - reclen) { + context->seen_enough = 1; + return; + } + + /* Format the parent pointer directly into the caller buffer. */ + gpr->gpr_reclen = reclen; + xfs_filehandle_init(mp, ino, gen, &gpr->gpr_parent); + memcpy(gpr->gpr_name, name, namelen); + gpr->gpr_name[namelen] = 0; + + trace_xfs_getparents_put_listent(ip, gp, context, gpr); + + context->firstu += reclen; + gpx->count++; + gpx->lastrec = gpr; +} + +/* Expand the last record to fill the rest of the caller's buffer. */ +static inline void +xfs_getparents_expand_lastrec( + struct xfs_getparents_ctx *gpx) +{ + struct xfs_getparents *gp = &gpx->gph.gph_request; + struct xfs_getparents_rec *gpr = gpx->lastrec; + + if (!gpx->lastrec) + gpr = gpx->krecords; + + gpr->gpr_reclen = gp->gp_bufsize - ((void *)gpr - gpx->krecords); + + trace_xfs_getparents_expand_lastrec(gpx->ip, gp, &gpx->context, gpr); +} + +static inline void __user *u64_to_uptr(u64 val) +{ + return (void __user *)(uintptr_t)val; +} + +/* Retrieve the parent pointers for a given inode. */ +STATIC int +xfs_getparents( + struct xfs_getparents_ctx *gpx) +{ + struct xfs_getparents *gp = &gpx->gph.gph_request; + struct xfs_inode *ip = gpx->ip; + struct xfs_mount *mp = ip->i_mount; + size_t bufsize; + int error; + + /* Check size of buffer requested by user */ + if (gp->gp_bufsize > XFS_XATTR_LIST_MAX) + return -ENOMEM; + if (gp->gp_bufsize < xfs_getparents_rec_sizeof(1)) + return -EINVAL; + + if (gp->gp_iflags & ~XFS_GETPARENTS_IFLAGS_ALL) + return -EINVAL; + if (gp->gp_reserved) + return -EINVAL; + + bufsize = round_down(gp->gp_bufsize, sizeof(uint64_t)); + gpx->krecords = kvzalloc(bufsize, GFP_KERNEL); + if (!gpx->krecords) { + bufsize = min(bufsize, PAGE_SIZE); + gpx->krecords = kvzalloc(bufsize, GFP_KERNEL); + if (!gpx->krecords) + return -ENOMEM; + } + + gpx->context.dp = ip; + gpx->context.resynch = 1; + gpx->context.put_listent = xfs_getparents_put_listent; + gpx->context.bufsize = bufsize; + /* firstu is used to track the bytes filled in the buffer */ + gpx->context.firstu = 0; + + /* Copy the cursor provided by caller */ + memcpy(&gpx->context.cursor, &gp->gp_cursor, + sizeof(struct xfs_attrlist_cursor)); + gpx->count = 0; + gp->gp_oflags = 0; + + trace_xfs_getparents_begin(ip, gp, &gpx->context.cursor); + + error = xfs_attr_list(&gpx->context); + if (error) + goto out_free_buf; + if (gpx->context.seen_enough < 0) { + error = gpx->context.seen_enough; + goto out_free_buf; + } + xfs_getparents_expand_lastrec(gpx); + + /* Update the caller with the current cursor position */ + memcpy(&gp->gp_cursor, &gpx->context.cursor, + sizeof(struct xfs_attrlist_cursor)); + + /* Is this the root directory? */ + if (ip->i_ino == mp->m_sb.sb_rootino) + gp->gp_oflags |= XFS_GETPARENTS_OFLAG_ROOT; + + if (gpx->context.seen_enough == 0) { + /* + * If we did not run out of buffer space, then we reached the + * end of the pptr recordset, so set the DONE flag. + */ + gp->gp_oflags |= XFS_GETPARENTS_OFLAG_DONE; + } else if (gpx->count == 0) { + /* + * If we ran out of buffer space before copying any parent + * pointers at all, the caller's buffer was too short. Tell + * userspace that, erm, the message is too long. + */ + error = -EMSGSIZE; + goto out_free_buf; + } + + trace_xfs_getparents_end(ip, gp, &gpx->context.cursor); + + ASSERT(gpx->context.firstu <= gpx->gph.gph_request.gp_bufsize); + + /* Copy the records to userspace. */ + if (copy_to_user(u64_to_uptr(gpx->gph.gph_request.gp_buffer), + gpx->krecords, gpx->context.firstu)) + error = -EFAULT; + +out_free_buf: + kvfree(gpx->krecords); + gpx->krecords = NULL; + return error; +} + +/* Retrieve the parents of this file and pass them back to userspace. */ +int +xfs_ioc_getparents( + struct file *file, + struct xfs_getparents __user *ureq) +{ + struct xfs_getparents_ctx gpx = { + .ip = XFS_I(file_inode(file)), + }; + struct xfs_getparents *kreq = &gpx.gph.gph_request; + struct xfs_mount *mp = gpx.ip->i_mount; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!xfs_has_parent(mp)) + return -EOPNOTSUPP; + if (copy_from_user(kreq, ureq, sizeof(*kreq))) + return -EFAULT; + + error = xfs_getparents(&gpx); + if (error) + return error; + + if (copy_to_user(ureq, kreq, sizeof(*kreq))) + return -EFAULT; + + return 0; +} + +/* Retrieve the parents of this file handle and pass them back to userspace. */ +int +xfs_ioc_getparents_by_handle( + struct file *file, + struct xfs_getparents_by_handle __user *ureq) +{ + struct xfs_getparents_ctx gpx = { }; + struct xfs_inode *ip = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip->i_mount; + struct xfs_getparents_by_handle *kreq = &gpx.gph; + struct xfs_handle *handle = &kreq->gph_handle; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!xfs_has_parent(mp)) + return -EOPNOTSUPP; + if (copy_from_user(kreq, ureq, sizeof(*kreq))) + return -EFAULT; + + /* + * We don't use exportfs_decode_fh because it does too much work here. + * If the handle refers to a directory, the exportfs code will walk + * upwards through the directory tree to connect the dentries to the + * root directory dentry. For GETPARENTS we don't care about that + * because we're not actually going to open a file descriptor; we only + * want to open an inode and read its parent pointers. + * + * Note that xfs_scrub uses GETPARENTS to log that it will try to fix a + * corrupted file's metadata. For this usecase we would really rather + * userspace single-step the path reconstruction to avoid loops or + * other strange things if the directory tree is corrupt. + */ + gpx.ip = xfs_khandle_to_inode(file, handle); + if (IS_ERR(gpx.ip)) + return PTR_ERR(gpx.ip); + + error = xfs_getparents(&gpx); + if (error) + goto out_rele; + + if (copy_to_user(ureq, kreq, sizeof(*kreq))) + error = -EFAULT; + +out_rele: + xfs_irele(gpx.ip); + return error; +} diff --git a/fs/xfs/xfs_handle.h b/fs/xfs/xfs_handle.h index e39eaf4689da..6799a86d8565 100644 --- a/fs/xfs/xfs_handle.h +++ b/fs/xfs/xfs_handle.h @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2022-2024 Oracle. * All rights reserved. */ #ifndef __XFS_HANDLE_H__ @@ -25,4 +26,8 @@ int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, struct dentry *xfs_handle_to_dentry(struct file *parfilp, void __user *uhandle, u32 hlen); +int xfs_ioc_getparents(struct file *file, struct xfs_getparents __user *arg); +int xfs_ioc_getparents_by_handle(struct file *file, + struct xfs_getparents_by_handle __user *arg); + #endif /* __XFS_HANDLE_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ed05fcd6261d..0e97070abe80 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -35,6 +35,7 @@ #include "xfs_health.h" #include "xfs_reflink.h" #include "xfs_ioctl.h" +#include "xfs_xattr.h" #include "xfs_rtbitmap.h" #include "xfs_file.h" #include "xfs_exchrange.h" @@ -1424,7 +1425,10 @@ xfs_file_ioctl( case XFS_IOC_FSGETXATTRA: return xfs_ioc_fsgetxattra(ip, arg); - + case XFS_IOC_GETPARENTS: + return xfs_ioc_getparents(filp, arg); + case XFS_IOC_GETPARENTS_BY_HANDLE: + return xfs_ioc_getparents_by_handle(filp, arg); case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index cf92a3bd56c7..9c7fbaae2717 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -41,6 +41,7 @@ #include "xfs_bmap.h" #include "xfs_exchmaps.h" #include "xfs_exchrange.h" +#include "xfs_parent.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5621db48e763..05cb59bd0b80 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -87,6 +87,9 @@ struct xfs_bmap_intent; struct xfs_exchmaps_intent; struct xfs_exchmaps_req; struct xfs_exchrange; +struct xfs_getparents; +struct xfs_parent_irec; +struct xfs_attrlist_cursor_kern; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -5096,6 +5099,95 @@ TRACE_EVENT(xfs_exchmaps_delta_nextents, __entry->d_nexts1, __entry->d_nexts2) ); +DECLARE_EVENT_CLASS(xfs_getparents_rec_class, + TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, + const struct xfs_attr_list_context *context, + const struct xfs_getparents_rec *pptr), + TP_ARGS(ip, ppi, context, pptr), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, firstu) + __field(unsigned short, reclen) + __field(unsigned int, bufsize) + __field(xfs_ino_t, parent_ino) + __field(unsigned int, parent_gen) + __string(name, pptr->gpr_name) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->firstu = context->firstu; + __entry->reclen = pptr->gpr_reclen; + __entry->bufsize = ppi->gp_bufsize; + __entry->parent_ino = pptr->gpr_parent.ha_fid.fid_ino; + __entry->parent_gen = pptr->gpr_parent.ha_fid.fid_gen; + __assign_str(name, pptr->gpr_name); + ), + TP_printk("dev %d:%d ino 0x%llx firstu %u reclen %u bufsize %u parent_ino 0x%llx parent_gen 0x%x name '%s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->firstu, + __entry->reclen, + __entry->bufsize, + __entry->parent_ino, + __entry->parent_gen, + __get_str(name)) +) +#define DEFINE_XFS_GETPARENTS_REC_EVENT(name) \ +DEFINE_EVENT(xfs_getparents_rec_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, \ + const struct xfs_attr_list_context *context, \ + const struct xfs_getparents_rec *pptr), \ + TP_ARGS(ip, ppi, context, pptr)) +DEFINE_XFS_GETPARENTS_REC_EVENT(xfs_getparents_put_listent); +DEFINE_XFS_GETPARENTS_REC_EVENT(xfs_getparents_expand_lastrec); + +DECLARE_EVENT_CLASS(xfs_getparents_class, + TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, + const struct xfs_attrlist_cursor_kern *cur), + TP_ARGS(ip, ppi, cur), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned short, iflags) + __field(unsigned short, oflags) + __field(unsigned int, bufsize) + __field(unsigned int, hashval) + __field(unsigned int, blkno) + __field(unsigned int, offset) + __field(int, initted) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->iflags = ppi->gp_iflags; + __entry->oflags = ppi->gp_oflags; + __entry->bufsize = ppi->gp_bufsize; + __entry->hashval = cur->hashval; + __entry->blkno = cur->blkno; + __entry->offset = cur->offset; + __entry->initted = cur->initted; + ), + TP_printk("dev %d:%d ino 0x%llx iflags 0x%x oflags 0x%x bufsize %u cur_init? %d hashval 0x%x blkno %u offset %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->iflags, + __entry->oflags, + __entry->bufsize, + __entry->initted, + __entry->hashval, + __entry->blkno, + __entry->offset) +) +#define DEFINE_XFS_GETPARENTS_EVENT(name) \ +DEFINE_EVENT(xfs_getparents_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, \ + const struct xfs_attrlist_cursor_kern *cur), \ + TP_ARGS(ip, ppi, cur)) +DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_begin); +DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_end); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH From 7dafb449b7922c1eec6fee3ed85b679d51f0f431 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:56 -0700 Subject: [PATCH 26/30] xfs: don't remove the attr fork when parent pointers are enabled When an inode is removed, it may also cause the attribute fork to be removed if it is the last attribute. This transaction gets flushed to the log, but if the system goes down before we could inactivate the symlink, the log recovery tries to inactivate this inode (since it is on the unlinked list) but the verifier trips over the remote value and leaks it. Hence we ended up with a file in this odd state on a "clean" mount. The "obvious" fix is to prohibit erasure of the attr fork to avoid tripping over the verifiers when pptrs are enabled. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_leaf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 1a374c6885d7..b9e98950eb3d 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -891,7 +891,8 @@ xfs_attr_sf_removename( */ if (totsize == sizeof(struct xfs_attr_sf_hdr) && xfs_has_attr2(mp) && (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && - !(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE))) { + !(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE)) && + !xfs_has_parent(mp)) { xfs_attr_fork_remove(dp, args->trans); } else { xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); @@ -900,7 +901,8 @@ xfs_attr_sf_removename( ASSERT(totsize > sizeof(struct xfs_attr_sf_hdr) || (args->op_flags & XFS_DA_OP_ADDNAME) || !xfs_has_attr2(mp) || - dp->i_df.if_format == XFS_DINODE_FMT_BTREE); + dp->i_df.if_format == XFS_DINODE_FMT_BTREE || + xfs_has_parent(mp)); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); } From 5f98ec1cb5c264e4815e21d632ee0b3d6e700e3d Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:57 -0700 Subject: [PATCH 27/30] xfs: add a incompat feature bit for parent pointers Create an incompat feature bit and a fs geometry flag so that we can enable the feature in the ondisk superblock and advertise its existence to userspace. Signed-off-by: Mark Tinguely Signed-off-by: Dave Chinner Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_format.h | 1 + fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/libxfs/xfs_sb.c | 4 ++++ fs/xfs/xfs_super.c | 4 ++++ 4 files changed, 10 insertions(+) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index f1818c54af6f..b457e457e1f7 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -374,6 +374,7 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */ #define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */ #define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */ +#define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE | \ XFS_SB_FEAT_INCOMPAT_SPINODES | \ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index ea654df0505f..dd13bfa500f2 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -240,6 +240,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_INOBTCNT (1 << 22) /* inobt btree counter */ #define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */ #define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */ +#define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index c350e259b685..09e4bf949bf8 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -178,6 +178,8 @@ xfs_sb_version_to_features( features |= XFS_FEAT_NREXT64; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE) features |= XFS_FEAT_EXCHANGE_RANGE; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT) + features |= XFS_FEAT_PARENT; return features; } @@ -1254,6 +1256,8 @@ xfs_fs_geometry( geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME; if (xfs_has_inobtcounts(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT; + if (xfs_has_parent(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_PARENT; if (xfs_has_sector(mp)) { geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; geo->logsectsize = sbp->sb_logsectsize; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c303d7ff9597..27e9f749c4c7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1746,6 +1746,10 @@ xfs_fs_fill_super( xfs_warn(mp, "EXPERIMENTAL exchange-range feature enabled. Use at your own risk!"); + if (xfs_has_parent(mp)) + xfs_warn(mp, + "EXPERIMENTAL parent pointer feature enabled. Use at your own risk!"); + error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; From 7ea816ca4043c2bc6052f696b6aebe2c22980a03 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:58 -0700 Subject: [PATCH 28/30] xfs: fix unit conversion error in xfs_log_calc_max_attrsetm_res Dave and I were discussing some recent test regressions as a result of me turning on nrext64=1 on realtime filesystems, when we noticed that the minimum log size of a 32M filesystem jumped from 954 blocks to 4287 blocks. Digging through xfs_log_calc_max_attrsetm_res, Dave noticed that @size contains the maximum estimated amount of space needed for a local format xattr, in bytes, but we feed this quantity to XFS_NEXTENTADD_SPACE_RES, which requires units of blocks. This has resulted in an overestimation of the minimum log size over the years. We should nominally correct this, but there's a backwards compatibility problem -- if we enable it now, the minimum log size will decrease. If a corrected mkfs formats a filesystem with this new smaller log size, a user will encounter mount failures on an uncorrected kernel due to the larger minimum log size computations there. Therefore, turn this on for parent pointers because it wasn't merged at all upstream when this issue was discovered. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_log_rlimit.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index 9975b93a7412..3518d5e21df0 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -16,6 +16,29 @@ #include "xfs_bmap_btree.h" #include "xfs_trace.h" +/* + * Shortly after enabling the large extents count feature in 2023, longstanding + * bugs were found in the code that computes the minimum log size. Luckily, + * the bugs resulted in over-estimates of that size, so there's no impact to + * existing users. However, we don't want to reduce the minimum log size + * because that can create the situation where a newer mkfs writes a new + * filesystem that an older kernel won't mount. + * + * Therefore, we only may correct the computation starting with filesystem + * features that didn't exist in 2023. In other words, only turn this on if + * the filesystem has parent pointers. + * + * This function can be called before the XFS_HAS_* flags have been set up, + * (e.g. mkfs) so we must check the ondisk superblock. + */ +static inline bool +xfs_want_minlogsize_fixes( + struct xfs_sb *sb) +{ + return xfs_sb_is_v5(sb) && + xfs_sb_has_incompat_feature(sb, XFS_SB_FEAT_INCOMPAT_PARENT); +} + /* * Calculate the maximum length in bytes that would be required for a local * attribute value as large attributes out of line are not logged. @@ -31,6 +54,15 @@ xfs_log_calc_max_attrsetm_res( MAXNAMELEN - 1; nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); nblks += XFS_B_TO_FSB(mp, size); + + /* + * If the feature set is new enough, correct a unit conversion error in + * the xattr transaction reservation code that resulted in oversized + * minimum log size computations. + */ + if (xfs_want_minlogsize_fixes(&mp->m_sb)) + size = XFS_B_TO_FSB(mp, size); + nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK); return M_RES(mp)->tr_attrsetm.tr_logres + From 6ed858c7c678218aa8df9d9e75d5e9955c105415 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:59 -0700 Subject: [PATCH 29/30] xfs: drop compatibility minimum log size computations for reflink Let's also drop the oversized minimum log computations for reflink and rmap that were the result of bugs introduced many years ago. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_log_rlimit.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index 3518d5e21df0..d3bd6a86c8fe 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -24,6 +24,11 @@ * because that can create the situation where a newer mkfs writes a new * filesystem that an older kernel won't mount. * + * Several years prior, we also discovered that the transaction reservations + * for rmap and reflink operations were unnecessarily large. That was fixed, + * but the minimum log size computation was left alone to avoid the + * compatibility problems noted above. Fix that too. + * * Therefore, we only may correct the computation starting with filesystem * features that didn't exist in 2023. In other words, only turn this on if * the filesystem has parent pointers. @@ -80,6 +85,15 @@ xfs_log_calc_trans_resv_for_minlogblocks( { unsigned int rmap_maxlevels = mp->m_rmap_maxlevels; + /* + * If the feature set is new enough, drop the oversized minimum log + * size computation introduced by the original reflink code. + */ + if (xfs_want_minlogsize_fixes(&mp->m_sb)) { + xfs_trans_resv_calc(mp, resv); + return; + } + /* * In the early days of rmap+reflink, we always set the rmap maxlevels * to 9 even if the AG was small enough that it would never grow to From 67ac7091e35bd34b75c0ec77331b53ca052e0cb3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:48:00 -0700 Subject: [PATCH 30/30] xfs: enable parent pointers Add parent pointers to the list of supported features. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_format.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index b457e457e1f7..61f51becff4f 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -382,7 +382,8 @@ xfs_sb_has_ro_compat_feature( XFS_SB_FEAT_INCOMPAT_BIGTIME | \ XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR | \ XFS_SB_FEAT_INCOMPAT_NREXT64 | \ - XFS_SB_FEAT_INCOMPAT_EXCHRANGE) + XFS_SB_FEAT_INCOMPAT_EXCHRANGE | \ + XFS_SB_FEAT_INCOMPAT_PARENT) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool