From 9a48b4a6fd512bdaed7e38ba844be743163d49c6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:13 -0800 Subject: [PATCH 1/4] xfs: fully initialize xfs_da_args in xchk_directory_blocks While running the online fsck test suite, I noticed the following assertion in the kernel log (edited for brevity): XFS: Assertion failed: 0, file: fs/xfs/xfs_health.c, line: 571 ------------[ cut here ]------------ WARNING: CPU: 3 PID: 11667 at fs/xfs/xfs_message.c:104 assfail+0x46/0x4a [xfs] CPU: 3 PID: 11667 Comm: xfs_scrub Tainted: G W 5.19.0-rc7-xfsx #rc7 6e6475eb29fd9dda3181f81b7ca7ff961d277a40 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 RIP: 0010:assfail+0x46/0x4a [xfs] Call Trace: xfs_dir2_isblock+0xcc/0xe0 xchk_directory_blocks+0xc7/0x420 xchk_directory+0x53/0xb0 xfs_scrub_metadata+0x2b6/0x6b0 xfs_scrubv_metadata+0x35e/0x4d0 xfs_ioc_scrubv_metadata+0x111/0x160 xfs_file_ioctl+0x4ec/0xef0 __x64_sys_ioctl+0x82/0xa0 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 This assertion triggers in xfs_dirattr_mark_sick when the caller passes in a whichfork value that is neither of XFS_{DATA,ATTR}_FORK. The cause of this is that xchk_directory_blocks only partially initializes the xfs_da_args structure that is passed to xfs_dir2_isblock. If the data fork is not correct, the XFS_IS_CORRUPT clause will trigger. My development branch reports this failure to the health monitoring subsystem, which accesses the uninitialized args->whichfork field, leading the the assertion tripping. We really shouldn't be passing random stack contents around, so the solution here is to force the compiler to zero-initialize the struct. Found by fuzzing u3.bmx[0].blockcount = middlebit on xfs/1554. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/dir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 5c87800ab223..d1b0f23c2c59 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -666,7 +666,12 @@ xchk_directory_blocks( struct xfs_scrub *sc) { struct xfs_bmbt_irec got; - struct xfs_da_args args; + struct xfs_da_args args = { + .dp = sc ->ip, + .whichfork = XFS_DATA_FORK, + .geo = sc->mp->m_dir_geo, + .trans = sc->tp, + }; struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); struct xfs_mount *mp = sc->mp; xfs_fileoff_t leaf_lblk; @@ -689,9 +694,6 @@ xchk_directory_blocks( free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET); /* Is this a block dir? */ - args.dp = sc->ip; - args.geo = mp->m_dir_geo; - args.trans = sc->tp; error = xfs_dir2_isblock(&args, &is_block); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; From be1317fdb8d4e3ccbac43e199b360c248c600d99 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:14 -0800 Subject: [PATCH 2/4] xfs: don't track the AGFL buffer in the scrub AG context While scrubbing an allocation group, we don't need to hold the AGFL buffer as part of the scrub context. All that is necessary to lock an AG is to hold the AGI and AGF buffers, so fix all the existing users of the AGFL buffer to grab them only when necessary. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 47 +++++++++++++++++++++------------- fs/xfs/scrub/agheader_repair.c | 1 - fs/xfs/scrub/common.c | 8 ------ fs/xfs/scrub/repair.c | 11 ++++---- fs/xfs/scrub/scrub.h | 1 - 5 files changed, 35 insertions(+), 33 deletions(-) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index b7b838bd4ba4..af284baa6f4c 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -609,9 +609,16 @@ xchk_agf( /* AGFL */ struct xchk_agfl_info { - unsigned int sz_entries; + /* Number of AGFL entries that the AGF claims are in use. */ + unsigned int agflcount; + + /* Number of AGFL entries that we found. */ unsigned int nr_entries; + + /* Buffer to hold AGFL entries for extent checking. */ xfs_agblock_t *entries; + + struct xfs_buf *agfl_bp; struct xfs_scrub *sc; }; @@ -641,10 +648,10 @@ xchk_agfl_block( struct xfs_scrub *sc = sai->sc; if (xfs_verify_agbno(sc->sa.pag, agbno) && - sai->nr_entries < sai->sz_entries) + sai->nr_entries < sai->agflcount) sai->entries[sai->nr_entries++] = agbno; else - xchk_block_set_corrupt(sc, sc->sa.agfl_bp); + xchk_block_set_corrupt(sc, sai->agfl_bp); xchk_agfl_block_xref(sc, agbno); @@ -696,19 +703,26 @@ int xchk_agfl( struct xfs_scrub *sc) { - struct xchk_agfl_info sai; + struct xchk_agfl_info sai = { + .sc = sc, + }; struct xfs_agf *agf; xfs_agnumber_t agno = sc->sm->sm_agno; - unsigned int agflcount; unsigned int i; int error; + /* Lock the AGF and AGI so that nobody can touch this AG. */ error = xchk_ag_read_headers(sc, agno, &sc->sa); if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) - goto out; + return error; if (!sc->sa.agf_bp) return -EFSCORRUPTED; - xchk_buffer_recheck(sc, sc->sa.agfl_bp); + + /* Try to read the AGFL, and verify its structure if we get it. */ + error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &sai.agfl_bp); + if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) + return error; + xchk_buffer_recheck(sc, sai.agfl_bp); xchk_agfl_xref(sc); @@ -717,24 +731,21 @@ xchk_agfl( /* Allocate buffer to ensure uniqueness of AGFL entries. */ agf = sc->sa.agf_bp->b_addr; - agflcount = be32_to_cpu(agf->agf_flcount); - if (agflcount > xfs_agfl_size(sc->mp)) { + sai.agflcount = be32_to_cpu(agf->agf_flcount); + if (sai.agflcount > xfs_agfl_size(sc->mp)) { xchk_block_set_corrupt(sc, sc->sa.agf_bp); goto out; } - memset(&sai, 0, sizeof(sai)); - sai.sc = sc; - sai.sz_entries = agflcount; - sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, - KM_MAYFAIL); + sai.entries = kvcalloc(sai.agflcount, sizeof(xfs_agblock_t), + GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!sai.entries) { error = -ENOMEM; goto out; } /* Check the blocks in the AGFL. */ - error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr, - sc->sa.agfl_bp, xchk_agfl_block, &sai); + error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr, sai.agfl_bp, + xchk_agfl_block, &sai); if (error == -ECANCELED) { error = 0; goto out_free; @@ -742,7 +753,7 @@ xchk_agfl( if (error) goto out_free; - if (agflcount != sai.nr_entries) { + if (sai.agflcount != sai.nr_entries) { xchk_block_set_corrupt(sc, sc->sa.agf_bp); goto out_free; } @@ -758,7 +769,7 @@ xchk_agfl( } out_free: - kmem_free(sai.entries); + kvfree(sai.entries); out: return error; } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 1b0b4e243f77..2e75ff9b5b2e 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -697,7 +697,6 @@ xrep_agfl( * freespace overflow to the freespace btrees. */ sc->sa.agf_bp = agf_bp; - sc->sa.agfl_bp = agfl_bp; error = xrep_roll_ag_trans(sc); if (error) goto err; diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 9bbbf20f401b..ad70f29233c3 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -424,10 +424,6 @@ xchk_ag_read_headers( if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) return error; - error = xfs_alloc_read_agfl(sa->pag, sc->tp, &sa->agfl_bp); - if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) - return error; - return 0; } @@ -515,10 +511,6 @@ xchk_ag_free( struct xchk_ag *sa) { xchk_ag_btcur_free(sa); - if (sa->agfl_bp) { - xfs_trans_brelse(sc->tp, sa->agfl_bp); - sa->agfl_bp = NULL; - } if (sa->agf_bp) { xfs_trans_brelse(sc->tp, sa->agf_bp); sa->agf_bp = NULL; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index c18bd039fce9..2ada7fc1c398 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -126,8 +126,6 @@ xrep_roll_ag_trans( xfs_trans_bhold(sc->tp, sc->sa.agi_bp); if (sc->sa.agf_bp) xfs_trans_bhold(sc->tp, sc->sa.agf_bp); - if (sc->sa.agfl_bp) - xfs_trans_bhold(sc->tp, sc->sa.agfl_bp); /* * Roll the transaction. We still own the buffer and the buffer lock @@ -145,8 +143,6 @@ xrep_roll_ag_trans( xfs_trans_bjoin(sc->tp, sc->sa.agi_bp); if (sc->sa.agf_bp) xfs_trans_bjoin(sc->tp, sc->sa.agf_bp); - if (sc->sa.agfl_bp) - xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp); return 0; } @@ -498,6 +494,7 @@ xrep_put_freelist( struct xfs_scrub *sc, xfs_agblock_t agbno) { + struct xfs_buf *agfl_bp; int error; /* Make sure there's space on the freelist. */ @@ -516,8 +513,12 @@ xrep_put_freelist( return error; /* Put the block on the AGFL. */ + error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); + if (error) + return error; + error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp, - sc->sa.agfl_bp, agbno, 0); + agfl_bp, agbno, 0); if (error) return error; xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1, diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 3de5287e98d8..151567f88366 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -39,7 +39,6 @@ struct xchk_ag { /* AG btree roots */ struct xfs_buf *agf_bp; - struct xfs_buf *agfl_bp; struct xfs_buf *agi_bp; /* AG btrees */ From 3e59c0103e66d6e687a8b47fd70169542aba938e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:14 -0800 Subject: [PATCH 3/4] xfs: log the AGI/AGF buffers when rolling transactions during an AG repair Currently, the only way to lock an allocation group is to hold the AGI and AGF buffers. If a repair needs to roll the transaction while repairing some AG metadata, it maintains that lock by holding the two buffers across the transaction roll and joins them afterwards. However, repair is not like other parts of XFS that employ the bhold - roll - bjoin sequence because it's possible that the AGI or AGF buffers are not actually dirty before the roll. This presents two problems -- First, we need to redirty those buffers to keep them moving along in the log to avoid pinning the log tail. Second, a clean buffer log item can detach from the buffer. If this happens, the buffer type state is discarded along with the bli and must be reattached before the next time the buffer is logged. If it is not, the logging code will complain and log recovery will not work properly. An earlier version of this patch tried to fix the second problem by re-setting the buffer type in the bli after joining the buffer to the new transaction, but that looked weird and didn't solve the first problem. Instead, solve both problems by logging the buffer before rolling the transaction. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/repair.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 2ada7fc1c398..22335619c84e 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -121,24 +121,36 @@ xrep_roll_ag_trans( { int error; - /* Keep the AG header buffers locked so we can keep going. */ - if (sc->sa.agi_bp) + /* + * Keep the AG header buffers locked while we roll the transaction. + * Ensure that both AG buffers are dirty and held when we roll the + * transaction so that they move forward in the log without losing the + * bli (and hence the bli type) when the transaction commits. + * + * Normal code would never hold clean buffers across a roll, but repair + * needs both buffers to maintain a total lock on the AG. + */ + if (sc->sa.agi_bp) { + xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM); xfs_trans_bhold(sc->tp, sc->sa.agi_bp); - if (sc->sa.agf_bp) + } + + if (sc->sa.agf_bp) { + xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM); xfs_trans_bhold(sc->tp, sc->sa.agf_bp); + } /* - * Roll the transaction. We still own the buffer and the buffer lock - * regardless of whether or not the roll succeeds. If the roll fails, - * the buffers will be released during teardown on our way out of the - * kernel. If it succeeds, we join them to the new transaction and - * move on. + * Roll the transaction. We still hold the AG header buffers locked + * regardless of whether or not that succeeds. On failure, the buffers + * will be released during teardown on our way out of the kernel. If + * successful, join the buffers to the new transaction and move on. */ error = xfs_trans_roll(&sc->tp); if (error) return error; - /* Join AG headers to the new transaction. */ + /* Join the AG headers to the new transaction. */ if (sc->sa.agi_bp) xfs_trans_bjoin(sc->tp, sc->sa.agi_bp); if (sc->sa.agf_bp) From b255fab0f80cc65a334fcd90cd278673cddbc988 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 6 Nov 2022 17:03:14 -0800 Subject: [PATCH 4/4] xfs: make AGFL repair function avoid crosslinked blocks Teach the AGFL repair function to check each block of the proposed AGFL against the rmap btree. If the rmapbt finds any mappings that are not OWN_AG, strike that block from the list. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader_repair.c | 78 ++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 12 deletions(-) diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 2e75ff9b5b2e..82ceb60ea5fc 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -442,12 +442,18 @@ xrep_agf( /* AGFL */ struct xrep_agfl { + /* Bitmap of alleged AGFL blocks that we're not going to add. */ + struct xbitmap crossed; + /* Bitmap of other OWN_AG metadata blocks. */ struct xbitmap agmetablocks; /* Bitmap of free space. */ struct xbitmap *freesp; + /* rmapbt cursor for finding crosslinked blocks */ + struct xfs_btree_cur *rmap_cur; + struct xfs_scrub *sc; }; @@ -477,6 +483,41 @@ xrep_agfl_walk_rmap( return xbitmap_set_btcur_path(&ra->agmetablocks, cur); } +/* Strike out the blocks that are cross-linked according to the rmapbt. */ +STATIC int +xrep_agfl_check_extent( + struct xrep_agfl *ra, + uint64_t start, + uint64_t len) +{ + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(ra->sc->mp, start); + xfs_agblock_t last_agbno = agbno + len - 1; + int error; + + ASSERT(XFS_FSB_TO_AGNO(ra->sc->mp, start) == ra->sc->sa.pag->pag_agno); + + while (agbno <= last_agbno) { + bool other_owners; + + error = xfs_rmap_has_other_keys(ra->rmap_cur, agbno, 1, + &XFS_RMAP_OINFO_AG, &other_owners); + if (error) + return error; + + if (other_owners) { + error = xbitmap_set(&ra->crossed, agbno, 1); + if (error) + return error; + } + + if (xchk_should_terminate(ra->sc, &error)) + return error; + agbno++; + } + + return 0; +} + /* * Map out all the non-AGFL OWN_AG space in this AG so that we can deduce * which blocks belong to the AGFL. @@ -496,44 +537,58 @@ xrep_agfl_collect_blocks( struct xrep_agfl ra; struct xfs_mount *mp = sc->mp; struct xfs_btree_cur *cur; + struct xbitmap_range *br, *n; int error; ra.sc = sc; ra.freesp = agfl_extents; xbitmap_init(&ra.agmetablocks); + xbitmap_init(&ra.crossed); /* Find all space used by the free space btrees & rmapbt. */ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); error = xfs_rmap_query_all(cur, xrep_agfl_walk_rmap, &ra); - if (error) - goto err; xfs_btree_del_cursor(cur, error); + if (error) + goto out_bmp; /* Find all blocks currently being used by the bnobt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag, XFS_BTNUM_BNO); error = xbitmap_set_btblocks(&ra.agmetablocks, cur); - if (error) - goto err; xfs_btree_del_cursor(cur, error); + if (error) + goto out_bmp; /* Find all blocks currently being used by the cntbt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag, XFS_BTNUM_CNT); error = xbitmap_set_btblocks(&ra.agmetablocks, cur); - if (error) - goto err; - xfs_btree_del_cursor(cur, error); + if (error) + goto out_bmp; /* * Drop the freesp meta blocks that are in use by btrees. * The remaining blocks /should/ be AGFL blocks. */ error = xbitmap_disunion(agfl_extents, &ra.agmetablocks); - xbitmap_destroy(&ra.agmetablocks); if (error) - return error; + goto out_bmp; + + /* Strike out the blocks that are cross-linked. */ + ra.rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); + for_each_xbitmap_extent(br, n, agfl_extents) { + error = xrep_agfl_check_extent(&ra, br->start, br->len); + if (error) + break; + } + xfs_btree_del_cursor(ra.rmap_cur, error); + if (error) + goto out_bmp; + error = xbitmap_disunion(agfl_extents, &ra.crossed); + if (error) + goto out_bmp; /* * Calculate the new AGFL size. If we found more blocks than fit in @@ -541,11 +596,10 @@ xrep_agfl_collect_blocks( */ *flcount = min_t(uint64_t, xbitmap_hweight(agfl_extents), xfs_agfl_size(mp)); - return 0; -err: +out_bmp: + xbitmap_destroy(&ra.crossed); xbitmap_destroy(&ra.agmetablocks); - xfs_btree_del_cursor(cur, error); return error; }