mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 11:06:41 -05:00
ceph: fix race condition where r_parent becomes stale before sending message
When the parent directory's i_rwsem is not locked, req->r_parent may become stale due to concurrent operations (e.g. rename) between dentry lookup and message creation. Validate that r_parent matches the encoded parent inode and update to the correct inode if a mismatch is detected. [ idryomov: folded a follow-up fix from Alex to drop extra reference from ceph_get_reply_dir() in ceph_fill_trace(): ceph_get_reply_dir() may return a different, referenced inode when r_parent is stale and the parent directory lock is not held. ceph_fill_trace() used that inode but failed to drop the reference when it differed from req->r_parent, leaking an inode reference. Keep the directory inode in a local variable and iput() it at function end if it does not match req->r_parent. ] Cc: stable@vger.kernel.org Signed-off-by: Alex Markuze <amarkuze@redhat.com> Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
committed by
Ilya Dryomov
parent
15f519e9f8
commit
bec324f33d
@@ -55,6 +55,52 @@ static int ceph_set_ino_cb(struct inode *inode, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the parent inode matches the vino from directory reply info
|
||||
*/
|
||||
static inline bool ceph_vino_matches_parent(struct inode *parent,
|
||||
struct ceph_vino vino)
|
||||
{
|
||||
return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap;
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate that the directory inode referenced by @req->r_parent matches the
|
||||
* inode number and snapshot id contained in the reply's directory record. If
|
||||
* they do not match – which can theoretically happen if the parent dentry was
|
||||
* moved between the time the request was issued and the reply arrived – fall
|
||||
* back to looking up the correct inode in the inode cache.
|
||||
*
|
||||
* A reference is *always* returned. Callers that receive a different inode
|
||||
* than the original @parent are responsible for dropping the extra reference
|
||||
* once the reply has been processed.
|
||||
*/
|
||||
static struct inode *ceph_get_reply_dir(struct super_block *sb,
|
||||
struct inode *parent,
|
||||
struct ceph_mds_reply_info_parsed *rinfo)
|
||||
{
|
||||
struct ceph_vino vino;
|
||||
|
||||
if (unlikely(!rinfo->diri.in))
|
||||
return parent; /* nothing to compare against */
|
||||
|
||||
/* If we didn't have a cached parent inode to begin with, just bail out. */
|
||||
if (!parent)
|
||||
return NULL;
|
||||
|
||||
vino.ino = le64_to_cpu(rinfo->diri.in->ino);
|
||||
vino.snap = le64_to_cpu(rinfo->diri.in->snapid);
|
||||
|
||||
if (likely(ceph_vino_matches_parent(parent, vino)))
|
||||
return parent; /* matches – use the original reference */
|
||||
|
||||
/* Mismatch – this should be rare. Emit a WARN and obtain the correct inode. */
|
||||
WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n",
|
||||
ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap);
|
||||
|
||||
return ceph_get_inode(sb, vino, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* ceph_new_inode - allocate a new inode in advance of an expected create
|
||||
* @dir: parent directory for new inode
|
||||
@@ -1523,6 +1569,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||
struct ceph_vino tvino, dvino;
|
||||
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
|
||||
struct ceph_client *cl = fsc->client;
|
||||
struct inode *parent_dir = NULL;
|
||||
int err = 0;
|
||||
|
||||
doutc(cl, "%p is_dentry %d is_target %d\n", req,
|
||||
@@ -1536,10 +1583,17 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||
}
|
||||
|
||||
if (rinfo->head->is_dentry) {
|
||||
struct inode *dir = req->r_parent;
|
||||
|
||||
if (dir) {
|
||||
err = ceph_fill_inode(dir, NULL, &rinfo->diri,
|
||||
/*
|
||||
* r_parent may be stale, in cases when R_PARENT_LOCKED is not set,
|
||||
* so we need to get the correct inode
|
||||
*/
|
||||
parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo);
|
||||
if (unlikely(IS_ERR(parent_dir))) {
|
||||
err = PTR_ERR(parent_dir);
|
||||
goto done;
|
||||
}
|
||||
if (parent_dir) {
|
||||
err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
|
||||
rinfo->dirfrag, session, -1,
|
||||
&req->r_caps_reservation);
|
||||
if (err < 0)
|
||||
@@ -1548,14 +1602,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
||||
if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
|
||||
if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
|
||||
test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
|
||||
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
|
||||
bool is_nokey = false;
|
||||
struct qstr dname;
|
||||
struct dentry *dn, *parent;
|
||||
struct fscrypt_str oname = FSTR_INIT(NULL, 0);
|
||||
struct ceph_fname fname = { .dir = dir,
|
||||
struct ceph_fname fname = { .dir = parent_dir,
|
||||
.name = rinfo->dname,
|
||||
.ctext = rinfo->altname,
|
||||
.name_len = rinfo->dname_len,
|
||||
@@ -1564,10 +1618,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||
BUG_ON(!rinfo->head->is_target);
|
||||
BUG_ON(req->r_dentry);
|
||||
|
||||
parent = d_find_any_alias(dir);
|
||||
parent = d_find_any_alias(parent_dir);
|
||||
BUG_ON(!parent);
|
||||
|
||||
err = ceph_fname_alloc_buffer(dir, &oname);
|
||||
err = ceph_fname_alloc_buffer(parent_dir, &oname);
|
||||
if (err < 0) {
|
||||
dput(parent);
|
||||
goto done;
|
||||
@@ -1576,7 +1630,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||
err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
|
||||
if (err < 0) {
|
||||
dput(parent);
|
||||
ceph_fname_free_buffer(dir, &oname);
|
||||
ceph_fname_free_buffer(parent_dir, &oname);
|
||||
goto done;
|
||||
}
|
||||
dname.name = oname.name;
|
||||
@@ -1595,7 +1649,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||
dname.len, dname.name, dn);
|
||||
if (!dn) {
|
||||
dput(parent);
|
||||
ceph_fname_free_buffer(dir, &oname);
|
||||
ceph_fname_free_buffer(parent_dir, &oname);
|
||||
err = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
@@ -1610,12 +1664,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||
ceph_snap(d_inode(dn)) != tvino.snap)) {
|
||||
doutc(cl, " dn %p points to wrong inode %p\n",
|
||||
dn, d_inode(dn));
|
||||
ceph_dir_clear_ordered(dir);
|
||||
ceph_dir_clear_ordered(parent_dir);
|
||||
d_delete(dn);
|
||||
dput(dn);
|
||||
goto retry_lookup;
|
||||
}
|
||||
ceph_fname_free_buffer(dir, &oname);
|
||||
ceph_fname_free_buffer(parent_dir, &oname);
|
||||
|
||||
req->r_dentry = dn;
|
||||
dput(parent);
|
||||
@@ -1794,6 +1848,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||
&dvino, ptvino);
|
||||
}
|
||||
done:
|
||||
/* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */
|
||||
if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent))
|
||||
iput(parent_dir);
|
||||
doutc(cl, "done err=%d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user