diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 18:32:43 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 18:32:43 -0700 |
commit | b41dae061bbd722b9d7fa828f35d22035b218e18 (patch) | |
tree | a5c0bade0c3d221483b54204bfc47e4fdbf09316 /fs/xfs/libxfs | |
parent | e6bc9de714972cac34daa1dc1567ee48a47a9342 (diff) | |
parent | 14e15f1bcd738dc13dd7c1e78e4800e8bc577980 (diff) |
Merge tag 'xfs-5.4-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong:
"For this cycle we have the usual pile of cleanups and bug fixes, some
performance improvements for online metadata scrubbing, massive
speedups in the directory entry creation code, some performance
improvement in the file ACL lookup code, a fix for a logging stall
during mount, and fixes for concurrency problems.
It has survived a couple of weeks of xfstests runs and merges cleanly.
Summary:
- Remove KM_SLEEP/KM_NOSLEEP.
- Ensure that memory buffers for IO are properly sector-aligned to
avoid problems that the block layer doesn't check.
- Make the bmap scrubber more efficient in its record checking.
- Don't crash xfs_db when superblock inode geometry is corrupt.
- Fix btree key helper functions.
- Remove unneeded error returns for things that can't fail.
- Fix buffer logging bugs in repair.
- Clean up iterator return values.
- Speed up directory entry creation.
- Enable allocation of xattr value memory buffer during lookup.
- Fix readahead racing with truncate/punch hole.
- Other minor cleanups.
- Fix one AGI/AGF deadlock with RENAME_WHITEOUT.
- More BUG -> WARN whackamole.
- Fix various problems with the log failing to advance under certain
circumstances, which results in stalls during mount"
* tag 'xfs-5.4-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (45 commits)
xfs: push the grant head when the log head moves forward
xfs: push iclog state cleaning into xlog_state_clean_log
xfs: factor iclog state processing out of xlog_state_do_callback()
xfs: factor callbacks out of xlog_state_do_callback()
xfs: factor debug code out of xlog_state_do_callback()
xfs: prevent CIL push holdoff in log recovery
xfs: fix missed wakeup on l_flush_wait
xfs: push the AIL in xlog_grant_head_wake
xfs: Use WARN_ON_ONCE for bailout mount-operation
xfs: Fix deadlock between AGI and AGF with RENAME_WHITEOUT
xfs: define a flags field for the AG geometry ioctl structure
xfs: add a xfs_valid_startblock helper
xfs: remove the unused XFS_ALLOC_USERDATA flag
xfs: cleanup xfs_fsb_to_db
xfs: fix the dax supported check in xfs_ioctl_setattr_dax_invalidate
xfs: Fix stale data exposure when readahead races with hole punch
fs: Export generic_fadvise()
mm: Handle MADV_WILLNEED through vfs_fadvise()
xfs: allocate xattr buffer on demand
xfs: consolidate attribute value copying
...
Diffstat (limited to 'fs/xfs/libxfs')
28 files changed, 639 insertions, 618 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 372ad55631fc..533b04aaf6f6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2205,7 +2205,7 @@ xfs_defer_agfl_block( ASSERT(xfs_bmap_free_item_zone != NULL); ASSERT(oinfo != NULL); - new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); + new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0); new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); new->xefi_blockcount = 1; new->xefi_oinfo = *oinfo; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index d6ed5d2c07c2..58fa85cec325 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -81,10 +81,9 @@ typedef struct xfs_alloc_arg { /* * Defines for datatype */ -#define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/ -#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ -#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ -#define XFS_ALLOC_NOBUSY (1 << 3)/* Busy extents not allowed */ +#define XFS_ALLOC_INITIAL_USER_DATA (1 << 0)/* special case start of file */ +#define XFS_ALLOC_USERDATA_ZERO (1 << 1)/* zero extent on allocation */ +#define XFS_ALLOC_NOBUSY (1 << 2)/* Busy extents not allowed */ static inline bool xfs_alloc_is_userdata(int datatype) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index d48fcf11cc35..510ca6974604 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -97,7 +97,10 @@ xfs_inode_hasattr( * Overall external interface routines. *========================================================================*/ -/* Retrieve an extended attribute and its value. Must have ilock. */ +/* + * Retrieve an extended attribute and its value. Must have ilock. + * Returns 0 on successful retrieval, otherwise an error. + */ int xfs_attr_get_ilocked( struct xfs_inode *ip, @@ -115,12 +118,28 @@ xfs_attr_get_ilocked( return xfs_attr_node_get(args); } -/* Retrieve an extended attribute by name, and its value. */ +/* + * Retrieve an extended attribute by name, and its value if requested. + * + * If ATTR_KERNOVAL is set in @flags, then the caller does not want the value, + * just an indication whether the attribute exists and the size of the value if + * it exists. The size is returned in @valuelenp, + * + * If the attribute is found, but exceeds the size limit set by the caller in + * @valuelenp, return -ERANGE with the size of the attribute that was found in + * @valuelenp. + * + * If ATTR_ALLOC is set in @flags, allocate the buffer for the value after + * existence of the attribute has been determined. On success, return that + * buffer to the caller and leave them to free it. On failure, free any + * allocated buffer and ensure the buffer pointer returned to the caller is + * null. + */ int xfs_attr_get( struct xfs_inode *ip, const unsigned char *name, - unsigned char *value, + unsigned char **value, int *valuelenp, int flags) { @@ -128,6 +147,8 @@ xfs_attr_get( uint lock_mode; int error; + ASSERT((flags & (ATTR_ALLOC | ATTR_KERNOVAL)) || *value); + XFS_STATS_INC(ip->i_mount, xs_attr_get); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -137,17 +158,29 @@ xfs_attr_get( if (error) return error; - args.value = value; - args.valuelen = *valuelenp; /* Entirely possible to look up a name which doesn't exist */ args.op_flags = XFS_DA_OP_OKNOENT; + if (flags & ATTR_ALLOC) + args.op_flags |= XFS_DA_OP_ALLOCVAL; + else + args.value = *value; + args.valuelen = *valuelenp; lock_mode = xfs_ilock_attr_map_shared(ip); error = xfs_attr_get_ilocked(ip, &args); xfs_iunlock(ip, lock_mode); - *valuelenp = args.valuelen; - return error == -EEXIST ? 0 : error; + + /* on error, we have to clean up allocated value buffers */ + if (error) { + if (flags & ATTR_ALLOC) { + kmem_free(args.value); + *value = NULL; + } + return error; + } + *value = args.value; + return 0; } /* @@ -768,6 +801,8 @@ xfs_attr_leaf_removename( * * This leaf block cannot have a "remote" value, we only call this routine * if bmap_one_block() says there is only one block (ie: no remote blks). + * + * Returns 0 on successful retrieval, otherwise an error. */ STATIC int xfs_attr_leaf_get(xfs_da_args_t *args) @@ -789,9 +824,6 @@ xfs_attr_leaf_get(xfs_da_args_t *args) } error = xfs_attr3_leaf_getvalue(bp, args); xfs_trans_brelse(args->trans, bp); - if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { - error = xfs_attr_rmtval_get(args); - } return error; } @@ -1268,11 +1300,13 @@ xfs_attr_refillstate(xfs_da_state_t *state) } /* - * Look up a filename in a node attribute list. + * Retrieve the attribute data from a node attribute list. * * This routine gets called for any attribute fork that has more than one * block, ie: both true Btree attr lists and for single-leaf-blocks with * "remote" values taking up more blocks. + * + * Returns 0 on successful retrieval, otherwise an error. */ STATIC int xfs_attr_node_get(xfs_da_args_t *args) @@ -1294,24 +1328,21 @@ xfs_attr_node_get(xfs_da_args_t *args) error = xfs_da3_node_lookup_int(state, &retval); if (error) { retval = error; - } else if (retval == -EEXIST) { - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->bp != NULL); - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - - /* - * Get the value, local or "remote" - */ - retval = xfs_attr3_leaf_getvalue(blk->bp, args); - if (!retval && (args->rmtblkno > 0) - && !(args->flags & ATTR_KERNOVAL)) { - retval = xfs_attr_rmtval_get(args); - } + goto out_release; } + if (retval != -EEXIST) + goto out_release; + + /* + * Get the value, local or "remote" + */ + blk = &state->path.blk[state->path.active - 1]; + retval = xfs_attr3_leaf_getvalue(blk->bp, args); /* * If not in a transaction, we have to release all the buffers. */ +out_release: for (i = 0; i < state->path.active; i++) { xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index ff28ebf3b635..94badfa1743e 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -37,6 +37,7 @@ struct xfs_attr_list_context; #define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ #define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */ +#define ATTR_ALLOC 0x8000 /* allocate xattr buffer on demand */ #define XFS_ATTR_FLAGS \ { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ @@ -47,7 +48,8 @@ struct xfs_attr_list_context; { ATTR_REPLACE, "REPLACE" }, \ { ATTR_KERNOTIME, "KERNOTIME" }, \ { ATTR_KERNOVAL, "KERNOVAL" }, \ - { ATTR_INCOMPLETE, "INCOMPLETE" } + { ATTR_INCOMPLETE, "INCOMPLETE" }, \ + { ATTR_ALLOC, "ALLOC" } /* * The maximum size (into the kernel or returned from the kernel) of an @@ -143,7 +145,7 @@ int xfs_attr_list_int(struct xfs_attr_list_context *); int xfs_inode_hasattr(struct xfs_inode *ip); int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args); int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, - unsigned char *value, int *valuelenp, int flags); + unsigned char **value, int *valuelenp, int flags); int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, unsigned char *value, int valuelen, int flags); int xfs_attr_set_args(struct xfs_da_args *args); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 70eb941d02e4..b9f019603d0b 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -393,6 +393,50 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags) return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); } +static int +xfs_attr_copy_value( + struct xfs_da_args *args, + unsigned char *value, + int valuelen) +{ + /* + * No copy if all we have to do is get the length + */ + if (args->flags & ATTR_KERNOVAL) { + args->valuelen = valuelen; + return 0; + } + + /* + * No copy if the length of the existing buffer is too small + */ + if (args->valuelen < valuelen) { + args->valuelen = valuelen; + return -ERANGE; + } + + if (args->op_flags & XFS_DA_OP_ALLOCVAL) { + args->value = kmem_alloc_large(valuelen, 0); + if (!args->value) + return -ENOMEM; + } + args->valuelen = valuelen; + + /* remote block xattr requires IO for copy-in */ + if (args->rmtblkno) + return xfs_attr_rmtval_get(args); + + /* + * This is to prevent a GCC warning because the remote xattr case + * doesn't have a value to pass in. In that case, we never reach here, + * but GCC can't work that out and so throws a "passing NULL to + * memcpy" warning. + */ + if (!value) + return -EINVAL; + memcpy(args->value, value, valuelen); + return 0; +} /*======================================================================== * External routines when attribute fork size < XFS_LITINO(mp). @@ -720,15 +764,19 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) } /* - * Look up a name in a shortform attribute list structure. + * Retreive the attribute value and length. + * + * If ATTR_KERNOVAL is specified, only the length needs to be returned. + * Unlike a lookup, we only return an error if the attribute does not + * exist or we can't retrieve the value. */ -/*ARGSUSED*/ int -xfs_attr_shortform_getvalue(xfs_da_args_t *args) +xfs_attr_shortform_getvalue( + struct xfs_da_args *args) { - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - int i; + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; + int i; ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE); sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data; @@ -741,18 +789,8 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) continue; if (!xfs_attr_namesp_match(args->flags, sfe->flags)) continue; - if (args->flags & ATTR_KERNOVAL) { - args->valuelen = sfe->valuelen; - return -EEXIST; - } - if (args->valuelen < sfe->valuelen) { - args->valuelen = sfe->valuelen; - return -ERANGE; - } - args->valuelen = sfe->valuelen; - memcpy(args->value, &sfe->nameval[args->namelen], - args->valuelen); - return -EEXIST; + return xfs_attr_copy_value(args, &sfe->nameval[args->namelen], + sfe->valuelen); } return -ENOATTR; } @@ -782,7 +820,7 @@ xfs_attr_shortform_to_leaf( ifp = dp->i_afp; sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; size = be16_to_cpu(sf->hdr.totsize); - tmpbuffer = kmem_alloc(size, KM_SLEEP); + tmpbuffer = kmem_alloc(size, 0); ASSERT(tmpbuffer != NULL); memcpy(tmpbuffer, ifp->if_u1.if_data, size); sf = (xfs_attr_shortform_t *)tmpbuffer; @@ -985,7 +1023,7 @@ xfs_attr3_leaf_to_shortform( trace_xfs_attr_leaf_to_sf(args); - tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); + tmpbuffer = kmem_alloc(args->geo->blksize, 0); if (!tmpbuffer) return -ENOMEM; @@ -1448,7 +1486,7 @@ xfs_attr3_leaf_compact( trace_xfs_attr_leaf_compact(args); - tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); + tmpbuffer = kmem_alloc(args->geo->blksize, 0); memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); memset(bp->b_addr, 0, args->geo->blksize); leaf_src = (xfs_attr_leafblock_t *)tmpbuffer; @@ -2167,7 +2205,7 @@ xfs_attr3_leaf_unbalance( struct xfs_attr_leafblock *tmp_leaf; struct xfs_attr3_icleaf_hdr tmphdr; - tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP); + tmp_leaf = kmem_zalloc(state->args->geo->blksize, 0); /* * Copy the header into the temp leaf so that all the stuff @@ -2350,6 +2388,10 @@ xfs_attr3_leaf_lookup_int( /* * Get the value associated with an attribute name from a leaf attribute * list structure. + * + * If ATTR_KERNOVAL is specified, only the length needs to be returned. + * Unlike a lookup, we only return an error if the attribute does not + * exist or we can't retrieve the value. */ int xfs_attr3_leaf_getvalue( @@ -2361,7 +2403,6 @@ xfs_attr3_leaf_getvalue( struct xfs_attr_leaf_entry *entry; struct xfs_attr_leaf_name_local *name_loc; struct xfs_attr_leaf_name_remote *name_rmt; - int valuelen; leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); @@ -2373,36 +2414,19 @@ xfs_attr3_leaf_getvalue( name_loc = xfs_attr3_leaf_name_local(leaf, args->index); ASSERT(name_loc->namelen == args->namelen); ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0); - valuelen = be16_to_cpu(name_loc->valuelen); - if (args->flags & ATTR_KERNOVAL) { - args->valuelen = valuelen; - return 0; - } - if (args->valuelen < valuelen) { - args->valuelen = valuelen; - return -ERANGE; - } - args->valuelen = valuelen; - memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); - } else { - name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); - ASSERT(name_rmt->namelen == args->namelen); - ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); - args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); - args->rmtblkno = be32_to_cpu(name_rmt->valueblk); - args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, - args->rmtvaluelen); - if (args->flags & ATTR_KERNOVAL) { - args->valuelen = args->rmtvaluelen; - return 0; - } - if (args->valuelen < args->rmtvaluelen) { - args->valuelen = args->rmtvaluelen; - return -ERANGE; - } - args->valuelen = args->rmtvaluelen; - } - return 0; + return xfs_attr_copy_value(args, + &name_loc->nameval[args->namelen], + be16_to_cpu(name_loc->valuelen)); + } + + name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); + ASSERT(name_rmt->namelen == args->namelen); + ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); + args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); + args->rmtblkno = be32_to_cpu(name_rmt->valueblk); + args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, + args->rmtvaluelen); + return xfs_attr_copy_value(args, NULL, args->rmtvaluelen); } /*======================================================================== diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 4eb30d357045..3e39b7d40f25 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -358,6 +358,8 @@ xfs_attr_rmtval_copyin( /* * Read the value associated with an attribute from the out-of-line buffer * that we stored it in. + * + * Returns 0 on successful retrieval, otherwise an error. */ int xfs_attr_rmtval_get( diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 07aad70f3931..054b4ce30033 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -553,7 +553,7 @@ __xfs_bmap_add_free( #endif ASSERT(xfs_bmap_free_item_zone != NULL); - new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); + new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0); new->xefi_startblock = bno; new->xefi_blockcount = (xfs_extlen_t)len; if (oinfo) @@ -1099,7 +1099,7 @@ xfs_bmap_add_attrfork( if (error) goto trans_cancel; ASSERT(ip->i_afp == NULL); - ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); + ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, 0); ip->i_afp->if_flags = XFS_IFEXTENTS; logflags = 0; switch (ip->i_d.di_format) { @@ -1985,11 +1985,8 @@ xfs_bmap_add_extent_delay_real( } /* add reverse mapping unless caller opted out */ - if (!(bma->flags & XFS_BMAPI_NORMAP)) { - error = xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new); - if (error) - goto done; - } + if (!(bma->flags & XFS_BMAPI_NORMAP)) + xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new); /* convert to a btree if necessary */ if (xfs_bmap_needs_btree(bma->ip, whichfork)) { @@ -2471,9 +2468,7 @@ xfs_bmap_add_extent_unwritten_real( } /* update reverse mappings */ - error = xfs_rmap_convert_extent(mp, tp, ip, whichfork, new); - if (error) - goto done; + xfs_rmap_convert_extent(mp, tp, ip, whichfork, new); /* convert to a btree if necessary */ if (xfs_bmap_needs_btree(ip, whichfork)) { @@ -2832,11 +2827,8 @@ xfs_bmap_add_extent_hole_real( } /* add reverse mapping unless caller opted out */ - if (!(flags & XFS_BMAPI_NORMAP)) { - error = xfs_rmap_map_extent(tp, ip, whichfork, new); - if (error) - goto done; - } + if (!(flags & XFS_BMAPI_NORMAP)) + xfs_rmap_map_extent(tp, ip, whichfork, new); /* convert to a btree if necessary */ if (xfs_bmap_needs_btree(ip, whichfork)) { @@ -4050,12 +4042,8 @@ xfs_bmapi_allocate( */ if (!(bma->flags & XFS_BMAPI_METADATA)) { bma->datatype = XFS_ALLOC_NOBUSY; - if (whichfork == XFS_DATA_FORK) { - if (bma->offset == 0) - bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; - else - bma->datatype |= XFS_ALLOC_USERDATA; - } + if (whichfork == XFS_DATA_FORK && bma->offset == 0) + bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; if (bma->flags & XFS_BMAPI_ZERO) bma->datatype |= XFS_ALLOC_USERDATA_ZERO; } @@ -4401,12 +4389,9 @@ xfs_bmapi_write( * If this is a CoW allocation, record the data in * the refcount btree for orphan recovery. */ - if (whichfork == XFS_COW_FORK) { - error = xfs_refcount_alloc_cow_extent(tp, - bma.blkno, bma.length); - if (error) - goto error0; - } + if (whichfork == XFS_COW_FORK) + xfs_refcount_alloc_cow_extent(tp, bma.blkno, + bma.length); } /* Deal with the allocated space we found. */ @@ -4530,7 +4515,7 @@ xfs_bmapi_convert_delalloc( if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK)) goto out_finish; error = -EFSCORRUPTED; - if (WARN_ON_ONCE(!bma.got.br_startblock && !XFS_IS_REALTIME_INODE(ip))) + if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) goto out_finish; XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); @@ -4540,12 +4525,8 @@ xfs_bmapi_convert_delalloc( *imap = bma.got; *seq = READ_ONCE(ifp->if_seq); - if (whichfork == XFS_COW_FORK) { - error = xfs_refcount_alloc_cow_extent(tp, bma.blkno, - bma.length); - if (error) - goto out_finish; - } + if (whichfork == XFS_COW_FORK) + xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length); error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, whichfork); @@ -5149,18 +5130,14 @@ xfs_bmap_del_extent_real( } /* remove reverse mapping */ - error = xfs_rmap_unmap_extent(tp, ip, whichfork, del); - if (error) - goto done; + xfs_rmap_unmap_extent(tp, ip, whichfork, del); /* * If we need to, add to list of extents to delete. */ if (do_fx && !(bflags & XFS_BMAPI_REMAP)) { if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { - error = xfs_refcount_decrease_extent(tp, del); - if (error) - goto done; + xfs_refcount_decrease_extent(tp, del); } else { __xfs_bmap_add_free(tp, del->br_startblock, del->br_blockcount, NULL, @@ -5651,12 +5628,11 @@ done: &new); /* update reverse mapping. rmap functions merge the rmaps for us */ - error = xfs_rmap_unmap_extent(tp, ip, whichfork, got); - if (error) - return error; + xfs_rmap_unmap_extent(tp, ip, whichfork, got); memcpy(&new, got, sizeof(new)); new.br_startoff = left->br_startoff + left->br_blockcount; - return xfs_rmap_map_extent(tp, ip, whichfork, &new); + xfs_rmap_map_extent(tp, ip, whichfork, &new); + return 0; } static int @@ -5695,10 +5671,9 @@ xfs_bmap_shift_update_extent( got); /* update reverse mapping */ - error = xfs_rmap_unmap_extent(tp, ip, whichfork, &prev); - if (error) - return error; - return xfs_rmap_map_extent(tp, ip, whichfork, got); + xfs_rmap_unmap_extent(tp, ip, whichfork, &prev); + xfs_rmap_map_extent(tp, ip, whichfork, got); + return 0; } int @@ -6094,7 +6069,7 @@ __xfs_bmap_add( bmap->br_blockcount, bmap->br_state); - bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS); + bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS); INIT_LIST_HEAD(&bi->bi_list); bi->bi_type = type; bi->bi_owner = ip; @@ -6106,29 +6081,29 @@ __xfs_bmap_add( } /* Map an extent into a file. */ -int +void xfs_bmap_map_extent( struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_bmbt_irec *PREV) { if (!xfs_bmap_is_update_needed(PREV)) - return 0; + return; - return __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV); + __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV); } /* Unmap an extent out of a file. */ -int +void xfs_bmap_unmap_extent( struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_bmbt_irec *PREV) { if (!xfs_bmap_is_update_needed(PREV)) - return 0; + return; - return __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV); + __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV); } /* diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 8f597f9abdbe..5bb446d80542 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -171,6 +171,13 @@ static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec) !isnullstartblock(irec->br_startblock); } +/* + * Check the mapping for obviously garbage allocations that could trash the + * filesystem immediately. + */ +#define xfs_valid_startblock(ip, startblock) \ + ((startblock) != 0 || XFS_IS_REALTIME_INODE(ip)) + void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); @@ -254,9 +261,9 @@ int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip, enum xfs_bmap_intent_type type, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, xfs_filblks_t *blockcount, xfs_exntst_t state); -int xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, +void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_bmbt_irec *imap); -int xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, +void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_bmbt_irec *imap); static inline int xfs_bmap_fork_to_state(int whichfork) diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index fbb18ba5d905..ffe608d2a2d9 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -400,8 +400,20 @@ xfs_bmbt_diff_two_keys( union xfs_btree_key *k1, union xfs_btree_key *k2) { - return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) - - be64_to_cpu(k2->bmbt.br_startoff); + uint64_t a = be64_to_cpu(k1->bmbt.br_startoff); + uint64_t b = be64_to_cpu(k2->bmbt.br_startoff); + + /* + * Note: This routine previously casted a and b to int64 and subtracted + * them to generate a result. This lead to problems if b was the + * "maximum" key value (all ones) being signed incorrectly, hence this + * somewhat less efficient version. + */ + if (a > b) + return 1; + if (b > a) + return -1; + return 0; } static xfs_failaddr_t diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index f1048efa4268..71de937f9e64 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4466,8 +4466,6 @@ xfs_btree_lblock_verify( * btree block * * @bp: buffer containing the btree block - * @max_recs: pointer to the m_*_mxr max records field in the xfs mount - * @pag_max_level: pointer to the per-ag max level field */ xfs_failaddr_t xfs_btree_sblock_v5hdr_verify( @@ -4600,7 +4598,7 @@ xfs_btree_simple_query_range( /* Callback */ error = fn(cur, recp, priv); - if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT) + if (error) break; advloop: @@ -4702,8 +4700,7 @@ pop_up: */ if (ldiff >= 0 && hdiff >= 0) { error = fn(cur, recp, priv); - if (error < 0 || - error == XFS_BTREE_QUERY_RANGE_ABORT) + if (error) break; } else if (hdiff < 0) { /* Record is larger than high key; pop. */ @@ -4774,8 +4771,7 @@ out: * Query a btree for all records overlapping a given interval of keys. The * supplied function will be called with each record found; return one of the * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error - * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a - * negative error code. + * code. This function returns -ECANCELED, zero, or a negative error code. */ int xfs_btree_query_range( @@ -4891,7 +4887,7 @@ xfs_btree_has_record_helper( union xfs_btree_rec *rec, void *priv) { - return XFS_BTREE_QUERY_RANGE_ABORT; + return -ECANCELED; } /* Is there a record covering a given range of keys? */ @@ -4906,7 +4902,7 @@ xfs_btree_has_record( error = xfs_btree_query_range(cur, low, high, &xfs_btree_has_record_helper, NULL); - if (error == XFS_BTREE_QUERY_RANGE_ABORT) { + if (error == -ECANCELED) { *exists = true; return 0; } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index fa3cd8ab9aba..ced1e65d1483 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -464,9 +464,13 @@ xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len); unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); -/* return codes */ -#define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */ -#define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */ +/* + * Return codes for the query range iterator function are 0 to continue + * iterating, and non-zero to stop iterating. Any non-zero value will be + * passed up to the _query_range caller. The special value -ECANCELED can be + * used to stop iteration, because _query_range never generates that error + * code on its own. + */ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, union xfs_btree_rec *rec, void *priv); diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 0bf56e94bfe9..4fd1223c1bd5 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2098,7 +2098,7 @@ xfs_da_grow_inode_int( * If we didn't get it and the block might work if fragmented, * try without the CONTIG flag. Loop until we get it all. */ - mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); + mapp = kmem_alloc(sizeof(*mapp) * count, 0); for (b = *bno, mapi = 0; b < *bno + count; ) { nmap = min(XFS_BMAP_MAX_NMAP, count); c = (int)(*bno + count - b); @@ -2480,7 +2480,7 @@ xfs_buf_map_from_irec( if (nirecs > 1) { map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), - KM_SLEEP | KM_NOFS); + KM_NOFS); if (!map) return -ENOMEM; *mapp = map; @@ -2539,7 +2539,7 @@ xfs_dabuf_map( */ if (nfsb != 1) irecs = kmem_zalloc(sizeof(irec) * nfsb, - KM_SLEEP | KM_NOFS); + KM_NOFS); nirecs = nfsb; error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 84dd865b6c3d..ae0bbd20d9ca 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -81,13 +81,15 @@ typedef struct xfs_da_args { #define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */ #define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ #define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ +#define XFS_DA_OP_ALLOCVAL 0x0020 /* lookup to alloc buffer if found */ #define XFS_DA_OP_FLAGS \ { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ { XFS_DA_OP_RENAME, "RENAME" }, \ { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ - { XFS_DA_OP_CILOOKUP, "CILOOKUP" } + { XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \ + { XFS_DA_OP_ALLOCVAL, "ALLOCVAL" } /* * Storage for holding state during Btree searches and split/join ops. diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index eb2be2a6a25a..22557527cfdb 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -517,7 +517,7 @@ xfs_defer_add( } if (!dfp) { dfp = kmem_alloc(sizeof(struct xfs_defer_pending), - KM_SLEEP | KM_NOFS); + KM_NOFS); dfp->dfp_type = type; dfp->dfp_intent = NULL; dfp->dfp_done = NULL; diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 67840723edbb..867c5dee0751 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -110,9 +110,9 @@ xfs_da_mount( nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), - KM_SLEEP | KM_MAYFAIL); + KM_MAYFAIL); mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), - KM_SLEEP | KM_MAYFAIL); + KM_MAYFAIL); if (!mp->m_dir_geo || !mp->m_attr_geo) { kmem_free(mp->m_dir_geo); kmem_free(mp->m_attr_geo); @@ -217,7 +217,7 @@ xfs_dir_init( if (error) return error; - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + args = kmem_zalloc(sizeof(*args), KM_NOFS); if (!args) return -ENOMEM; @@ -254,7 +254,7 @@ xfs_dir_createname( XFS_STATS_INC(dp->i_mount, xs_dir_create); } - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + args = kmem_zalloc(sizeof(*args), KM_NOFS); if (!args) return -ENOMEM; @@ -353,7 +353,7 @@ xfs_dir_lookup( * lockdep Doing this avoids having to add a bunch of lockdep class * annotations into the reclaim path for the ilock. */ - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + args = kmem_zalloc(sizeof(*args), KM_NOFS); args->geo = dp->i_mount->m_dir_geo; args->name = name->name; args->namelen = name->len; @@ -422,7 +422,7 @@ xfs_dir_removename( ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); XFS_STATS_INC(dp->i_mount, xs_dir_remove); - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + args = kmem_zalloc(sizeof(*args), KM_NOFS); if (!args) return -ENOMEM; @@ -483,7 +483,7 @@ xfs_dir_replace( if (rval) return rval; - args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + args = kmem_zalloc(sizeof(*args), KM_NOFS); if (!args) return -ENOMEM; diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index a6fb0cc2085e..9595ced393dc 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -1092,7 +1092,7 @@ xfs_dir2_sf_to_block( * Copy the directory into a temporary buffer. * Then pitch the incore inode data so we can make extents. */ - sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP); + sfp = kmem_alloc(ifp->if_bytes, 0); memcpy(sfp, oldsfp, ifp->if_bytes); xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 1fc44efc344d..705c4f562758 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -32,8 +32,6 @@ static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, int index, xfs_da_state_blk_t *dblk, int *rval); -static int xfs_dir2_node_addname_int(xfs_da_args_t *args, - xfs_da_state_blk_t *fblk); /* * Check internal consistency of a leafn block. @@ -1611,113 +1609,152 @@ xfs_dir2_leafn_unbalance( } /* - * Top-level node form directory addname routine. + * Add a new data block to the directory at the free space index that the caller + * has specified. */ -int /* error */ -xfs_dir2_node_addname( - xfs_da_args_t *args) /* operation arguments */ +static int +xfs_dir2_node_add_datablk( + struct xfs_da_args *args, + struct xfs_da_state_blk *fblk, + xfs_dir2_db_t *dbno, + struct xfs_buf **dbpp, + struct xfs_buf **fbpp, + int *findex) { - xfs_da_state_blk_t *blk; /* leaf block for insert */ - int error; /* error return value */ - int rval; /* sub-return value */ - xfs_da_state_t *state; /* btree cursor */ + struct xfs_inode *dp = args->dp; + struct xfs_trans *tp = args->trans; + struct xfs_mount *mp = dp->i_mount; + struct xfs_dir3_icfree_hdr freehdr; + struct xfs_dir2_data_free *bf; + struct xfs_dir2_data_hdr *hdr; + struct xfs_dir2_free *free = NULL; + xfs_dir2_db_t fbno; + struct xfs_buf *fbp; + struct xfs_buf *dbp; + __be16 *bests = NULL; + int error; - trace_xfs_dir2_node_addname(args); + /* Not allowed to allocate, return failure. */ + if (args->total == 0) + return -ENOSPC; + + /* Allocate and initialize the new data block. */ + error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, dbno); + if (error) + return error; + error = xfs_dir3_data_init(args, *dbno, &dbp); + if (error) + return error; /* - * Allocate and initialize the state (btree cursor). - */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - /* - * Look up the name. We're not supposed to find it, but - * this gives us the insertion point. + * Get the freespace block corresponding to the data block + * that was just allocated. */ - error = xfs_da3_node_lookup_int(state, &rval); + fbno = dp->d_ops->db_to_fdb(args->geo, *dbno); + error = xfs_dir2_free_try_read(tp, dp, + xfs_dir2_db_to_da(args->geo, fbno), &fbp); if (error) - rval = error; - if (rval != -ENOENT) { - goto done; - } + return error; + /* - * Add the data entry to a data block. - * Extravalid is set to a freeblock found by lookup. + * If there wasn't a freespace block, the read will + * return a NULL fbp. Allocate and initialize a new one. */ - rval = xfs_dir2_node_addname_int(args, - state->extravalid ? &state->extrablk : NULL); - if (rval) { - goto done; + if (!fbp) { + error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fbno); + if (error) + return error; + + if (dp->d_ops->db_to_fdb(args->geo, *dbno) != fbno) { + xfs_alert(mp, +"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld", + __func__, (unsigned long long)dp->i_ino, + (long long)dp->d_ops->db_to_fdb(args->geo, *dbno), + (long long)*dbno, (long long)fbno); + if (fblk) { + xfs_alert(mp, + " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", + fblk, (unsigned long long)fblk->blkno, + fblk->index, fblk->magic); + } else { + xfs_alert(mp, " ... fblk is NULL"); + } + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + + /* Get a buffer for the new block. */ + error = xfs_dir3_free_get_buf(args, fbno, &fbp); + if (error) + return error; + free = fbp->b_addr; + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + + /* Remember the first slot as our empty slot. */ + freehdr.firstdb = (fbno - xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET)) * + dp->d_ops->free_max_bests(args->geo); + } else { + free = fbp->b_addr; + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); } - blk = &state->path.blk[state->path.active - 1]; - ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); + + /* Set the freespace block index from the data block number. */ + *findex = dp->d_ops->db_to_fdindex(args->geo, *dbno); + + /* Extend the freespace table if the new data block is off the end. */ + if (*findex >= freehdr.nvalid) { + ASSERT(*findex < dp->d_ops->free_max_bests(args->geo)); + freehdr.nvalid = *findex + 1; + bests[*findex] = cpu_to_be16(NULLDATAOFF); + } + /* - * Add the new leaf entry. + * If this entry was for an empty data block (this should always be + * true) then update the header. */ - rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); - if (rval == 0) { - /* - * It worked, fix the hash values up the btree. - */ - if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) - xfs_da3_fixhashpath(state, &state->path); - } else { - /* - * It didn't work, we need to split the leaf block. - */ - if (args->total == 0) { - ASSERT(rval == -ENOSPC); - goto done; - } - /* - * Split the leaf block and insert the new entry. - */ - rval = xfs_da3_split(state); + if (bests[*findex] == cpu_to_be16(NULLDATAOFF)) { + freehdr.nused++; + dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); + xfs_dir2_free_log_header(args, fbp); } -done: - xfs_da_state_free(state); - return rval; + + /* Update the freespace value for the new block in the table. */ + hdr = dbp->b_addr; + bf = dp->d_ops->data_bestfree_p(hdr); + bests[*findex] = bf[0].length; + + *dbpp = dbp; + *fbpp = fbp; + return 0; } -/* - * Add the data entry for a node-format directory name addition. - * The leaf entry is added in xfs_dir2_leafn_add. - * We may enter with a freespace block that the lookup found. - */ -static int /* error */ -xfs_dir2_node_addname_int( - xfs_da_args_t *args, /* operation arguments */ - xfs_da_state_blk_t *fblk) /* optional freespace block */ +static int +xfs_dir2_node_find_freeblk( + struct xfs_da_args *args, + struct xfs_da_state_blk *fblk, + xfs_dir2_db_t *dbnop, + struct xfs_buf **fbpp, + int *findexp, + int length) { - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dir2_db_t dbno; /* data block number */ - struct xfs_buf *dbp; /* data block buffer */ - xfs_dir2_data_entry_t *dep; /* data entry pointer */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_data_unused_t *dup; /* data unused entry pointer */ - int error; /* error return value */ - xfs_dir2_db_t fbno; /* freespace block number */ - struct xfs_buf *fbp; /* freespace buffer */ - int findex; /* freespace entry index */ - xfs_dir2_free_t *free=NULL; /* freespace block structure */ - xfs_dir2_db_t ifbno; /* initial freespace block no */ - xfs_dir2_db_t lastfbno=0; /* highest freespace block no */ - int length; /* length of the new entry */ - int logfree; /* need to log free entry */ - xfs_mount_t *mp; /* filesystem mount point */ - int needlog; /* need to log data header */ - int needscan; /* need to rescan data frees */ - __be16 *tagp; /* data entry tag pointer */ - xfs_trans_t *tp; /* transaction pointer */ - __be16 *bests; struct xfs_dir3_icfree_hdr freehdr; - struct xfs_dir2_data_free *bf; - xfs_dir2_data_aoff_t aoff; + struct xfs_dir2_free *free = NULL; + struct xfs_inode *dp = args->dp; + struct xfs_trans *tp = args->trans; + struct xfs_buf *fbp = NULL; + xfs_dir2_db_t firstfbno; + xfs_dir2_db_t lastfbno; + xfs_dir2_db_t ifbno = -1; + xfs_dir2_db_t dbno = -1; + xfs_dir2_db_t fbno; + xfs_fileoff_t fo; + __be16 *bests = NULL; + int findex = 0; + int error; - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; - length = dp->d_ops->data_entsize(args->namelen); /* * If we came in with a freespace block that means that lookup * found an entry with our hash value. This is the freespace @@ -1725,288 +1762,157 @@ xfs_dir2_node_addname_int( */ if (fblk) { fbp = fblk->bp; - /* - * Remember initial freespace block number. - */ - ifbno = fblk->blkno; free = fbp->b_addr; findex = fblk->index; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - - /* - * This means the free entry showed that the data block had - * space for our entry, so we remembered it. - * Use that data block. - */ if (findex >= 0) { + /* caller already found the freespace for us. */ + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + ASSERT(findex < freehdr.nvalid); ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); ASSERT(be16_to_cpu(bests[findex]) >= length); dbno = freehdr.firstdb + findex; - } else { - /* - * The data block looked at didn't have enough room. - * We'll start at the beginning of the freespace entries. - */ - dbno = -1; - findex = 0; + goto found_block; } - } else { + /* - * Didn't come in with a freespace block, so no data block. + * The data block looked at didn't have enough room. + * We'll start at the beginning of the freespace entries. */ - ifbno = dbno = -1; + ifbno = fblk->blkno; + xfs_trans_brelse(tp, fbp); fbp = NULL; - findex = 0; + fblk->bp = NULL; } /* - * If we don't have a data block yet, we're going to scan the - * freespace blocks looking for one. Figure out what the - * highest freespace block number is. - */ - if (dbno == -1) { - xfs_fileoff_t fo; /* freespace block number */ - - if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) - return error; - lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); - fbno = ifbno; - } - /* - * While we haven't identified a data block, search the freeblock - * data for a good data block. If we find a null freeblock entry, - * indicating a hole in the data blocks, remember that. + * If we don't have a data block yet, we're going to scan the freespace + * data for a data block with enough free space in it. */ - while (dbno == -1) { - /* - * If we don't have a freeblock in hand, get the next one. - */ - if (fbp == NULL) { - /* - * Happens the first time through unless lookup gave - * us a freespace block to start with. - */ - if (++fbno == 0) - fbno = xfs_dir2_byte_to_db(args->geo, - XFS_DIR2_FREE_OFFSET); - /* - * If it's ifbno we already looked at it. - */ - if (fbno == ifbno) - fbno++; - /* - * If it's off the end we're done. - */ - if (fbno >= lastfbno) - break; - /* - * Read the block. There can be holes in the - * freespace blocks, so this might not succeed. - * This should be really rare, so there's no reason - * to avoid it. - */ - error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(args->geo, fbno), - &fbp); - if (error) - return error; - if (!fbp) - continue; - free = fbp->b_addr; - findex = 0; - } - /* - * Look at the current free entry. Is it good enough? - * - * The bests initialisation should be where the bufer is read in - * the above branch. But gcc is too stupid to realise that bests - * and the freehdr are actually initialised if they are placed - * there, so we have to do it here to avoid warnings. Blech. - */ - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - if (be16_to_cpu(bests[findex]) != NULLDATAOFF && - be16_to_cpu(bests[findex]) >= length) - dbno = freehdr.firstdb + findex; - else { - /* - * Are we done with the freeblock? - */ - if (++findex == freehdr.nvalid) { - /* - * Drop the block. - */ - xfs_trans_brelse(tp, fbp); - fbp = NULL; - if (fblk && fblk->bp) - fblk->bp = NULL; - } - } - } - /* - * If we don't have a data block, we need to allocate one and make - * the freespace entries refer to it. - */ - if (unlikely(dbno == -1)) { - /* - * Not allowed to allocate, return failure. - */ - if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) - return -ENOSPC; - - /* - * Allocate and initialize the new data block. - */ - if (unlikely((error = xfs_dir2_grow_inode(args, - XFS_DIR2_DATA_SPACE, - &dbno)) || - (error = xfs_dir3_data_init(args, dbno, &dbp)))) - return error; + error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK); + if (error) + return error; + lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); + firstfbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET); - /* - * If (somehow) we have a freespace block, get rid of it. - */ - if (fbp) - xfs_trans_brelse(tp, fbp); - if (fblk && fblk->bp) - fblk->bp = NULL; + for (fbno = lastfbno - 1; fbno >= firstfbno; fbno--) { + /* If it's ifbno we already looked at it. */ + if (fbno == ifbno) + continue; /* - * Get the freespace block corresponding to the data block - * that was just allocated. + * Read the block. There can be holes in the freespace blocks, + * so this might not succeed. This should be really rare, so + * there's no reason to avoid it. */ - fbno = dp->d_ops->db_to_fdb(args->geo, dbno); error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(args->geo, fbno), - &fbp); + xfs_dir2_db_to_da(args->geo, fbno), + &fbp); if (error) return error; + if (!fbp) + continue; - /* - * If there wasn't a freespace block, the read will - * return a NULL fbp. Allocate and initialize a new one. - */ - if (!fbp) { - error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, - &fbno); - if (error) - return error; + free = fbp->b_addr; + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); - if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) { - xfs_alert(mp, -"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld ifbno %llu lastfbno %d", - __func__, (unsigned long long)dp->i_ino, - (long long)dp->d_ops->db_to_fdb( - args->geo, dbno), - (long long)dbno, (long long)fbno, - (unsigned long long)ifbno, lastfbno); - if (fblk) { - xfs_alert(mp, - " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", - fblk, - (unsigned long long)fblk->blkno, - fblk->index, - fblk->magic); - } else { - xfs_alert(mp, " ... fblk is NULL"); - } - XFS_ERROR_REPORT("xfs_dir2_node_addname_int", - XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; + /* Scan the free entry array for a large enough free space. */ + for (findex = freehdr.nvalid - 1; findex >= 0; findex--) { + if (be16_to_cpu(bests[findex]) != NULLDATAOFF && + be16_to_cpu(bests[findex]) >= length) { + dbno = freehdr.firstdb + findex; + goto found_block; } - - /* - * Get a buffer for the new block. - */ - error = xfs_dir3_free_get_buf(args, fbno, &fbp); - if (error) - return error; - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - - /* - * Remember the first slot as our empty slot. - */ - freehdr.firstdb = - (fbno - xfs_dir2_byte_to_db(args->geo, - XFS_DIR2_FREE_OFFSET)) * - dp->d_ops->free_max_bests(args->geo); - } else { - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); } - /* - * Set the freespace block index from the data block number. - */ - findex = dp->d_ops->db_to_fdindex(args->geo, dbno); - /* - * If it's after the end of the current entries in the - * freespace block, extend that table. - */ - if (findex >= freehdr.nvalid) { - ASSERT(findex < dp->d_ops->free_max_bests(args->geo)); - freehdr.nvalid = findex + 1; - /* - * Tag new entry so nused will go up. - */ - bests[findex] = cpu_to_be16(NULLDATAOFF); - } - /* - * If this entry was for an empty data block - * (this should always be true) then update the header. - */ - if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { - freehdr.nused++; - dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_header(args, fbp); - } - /* - * Update the real value in the table. - * We haven't allocated the data entry yet so this will - * change again. - */ - hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); - bests[findex] = bf[0].length; - logfree = 1; + /* Didn't find free space, go on to next free block */ + xfs_trans_brelse(tp, fbp); } + +found_block: + *dbnop = dbno; + *fbpp = fbp; + *findexp = findex; + return 0; +} + + +/* + * Add the data entry for a node-format directory name addition. + * The leaf entry is added in xfs_dir2_leafn_add. + * We may enter with a freespace block that the lookup found. + */ +static int +xfs_dir2_node_addname_int( + struct xfs_da_args *args, /* operation arguments */ + struct xfs_da_state_blk *fblk) /* optional freespace block */ +{ + struct xfs_dir2_data_unused *dup; /* data unused entry pointer */ + struct xfs_dir2_data_entry *dep; /* data entry pointer */ + struct xfs_dir2_data_hdr *hdr; /* data block header */ + struct xfs_dir2_data_free *bf; + struct xfs_dir2_free *free = NULL; /* freespace block structure */ + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; + struct xfs_buf *dbp; /* data block buffer */ + struct xfs_buf *fbp; /* freespace buffer */ + xfs_dir2_data_aoff_t aoff; + xfs_dir2_db_t dbno; /* data block number */ + int error; /* error return value */ + int findex; /* freespace entry index */ + int length; /* length of the new entry */ + int logfree = 0; /* need to log free entry */ + int needlog = 0; /* need to log data header */ + int needscan = 0; /* need to rescan data frees */ + __be16 *tagp; /* data entry tag pointer */ + __be16 *bests; + + length = dp->d_ops->data_entsize(args->namelen); + error = xfs_dir2_node_find_freeblk(args, fblk, &dbno, &fbp, &findex, + length); + if (error) + return error; + /* - * We had a data block so we don't have to make a new one. + * Now we know if we must allocate blocks, so if we are checking whether + * we can insert without allocation then we can return now. */ - else { - /* - * If just checking, we succeeded. - */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) - return 0; + if (args->op_flags & XFS_DA_OP_JUSTCHECK) { + if (dbno == -1) + return -ENOSPC; + return 0; + } - /* - * Read the data block in. - */ + /* + * If we don't have a data block, we need to allocate one and make + * the freespace entries refer to it. + */ + if (dbno == -1) { + /* we're going to have to log the free block index later */ + logfree = 1; + error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp, + &findex); + } else { + /* Read the data block in. */ error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, dbno), -1, &dbp); - if (error) - return error; - hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); - logfree = 0; } + if (error) + return error; + + /* setup for data block up now */ + hdr = dbp->b_addr; + bf = dp->d_ops->data_bestfree_p(hdr); ASSERT(be16_to_cpu(bf[0].length) >= length); - /* - * Point to the existing unused space. - */ + + /* Point to the existing unused space. */ dup = (xfs_dir2_data_unused_t *) ((char *)hdr + be16_to_cpu(bf[0].offset)); - needscan = needlog = 0; - /* - * Mark the first part of the unused space, inuse for us. - */ + + /* Mark the first part of the unused space, inuse for us. */ aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length, &needlog, &needscan); @@ -2014,9 +1920,8 @@ xfs_dir2_node_addname_int( xfs_trans_brelse(tp, dbp); return error; } - /* - * Fill in the new entry and log it. - */ + + /* Fill in the new entry and log it. */ dep = (xfs_dir2_data_entry_t *)dup; dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; @@ -2025,38 +1930,101 @@ xfs_dir2_node_addname_int( tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(args, dbp, dep); - /* - * Rescan the block for bestfree if needed. - */ + + /* Rescan the freespace and log the data block if needed. */ if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); - /* - * Log the data block header if needed. - */ if (needlog) xfs_dir2_data_log_header(args, dbp); - /* - * If the freespace entry is now wrong, update it. - */ - bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */ - if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) { + + /* If the freespace block entry is now wrong, update it. */ + free = fbp->b_addr; + bests = dp->d_ops->free_bests_p(free); + if (bests[findex] != bf[0].length) { bests[findex] = bf[0].length; logfree = 1; } - /* - * Log the freespace entry if needed. - */ + + /* Log the freespace entry if needed. */ if (logfree) xfs_dir2_free_log_bests(args, fbp, findex, findex); - /* - * Return the data block and offset in args, then drop the data block. - */ + + /* Return the data block and offset in args. */ args->blkno = (xfs_dablk_t)dbno; args->index = be16_to_cpu(*tagp); return 0; } /* + * Top-level node form directory addname routine. + */ +int /* error */ +xfs_dir2_node_addname( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_da_state_blk_t *blk; /* leaf block for insert */ + int error; /* error return value */ + int rval; /* sub-return value */ + xfs_da_state_t *state; /* btree cursor */ + + trace_xfs_dir2_node_addname(args); + + /* + * Allocate and initialize the state (btree cursor). + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + /* + * Look up the name. We're not supposed to find it, but + * this gives us the insertion point. + */ + error = xfs_da3_node_lookup_int(state, &rval); + if (error) + rval = error; + if (rval != -ENOENT) { + goto done; + } + /* + * Add the data entry to a data block. + * Extravalid is set to a freeblock found by lookup. + */ + rval = xfs_dir2_node_addname_int(args, + state->extravalid ? &state->extrablk : NULL); + if (rval) { + goto done; + } + blk = &state->path.blk[state->path.active - 1]; + ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); + /* + * Add the new leaf entry. + */ + rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); + if (rval == 0) { + /* + * It worked, fix the hash values up the btree. + */ + if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) + xfs_da3_fixhashpath(state, &state->path); + } else { + /* + * It didn't work, we need to split the leaf block. + */ + if (args->total == 0) { + ASSERT(rval == -ENOSPC); + goto done; + } + /* + * Split the leaf block and insert the new entry. + */ + rval = xfs_da3_split(state); + } +done: + xfs_da_state_free(state); + return rval; +} + +/* * Lookup an entry in a node-format directory. * All the real work happens in xfs_da3_node_lookup_int. * The only real output is the inode number of the entry. diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 033589257f54..85f14fc2a8da 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -164,7 +164,7 @@ xfs_dir2_block_to_sf( * can free the block and copy the formatted data into the inode literal * area. */ - dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP); + dst = kmem_alloc(mp->m_sb.sb_inodesize, 0); hdr = bp->b_addr; /* @@ -436,7 +436,7 @@ xfs_dir2_sf_addname_hard( sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; old_isize = (int)dp->i_d.di_size; - buf = kmem_alloc(old_isize, KM_SLEEP); + buf = kmem_alloc(old_isize, 0); oldsfp = (xfs_dir2_sf_hdr_t *)buf; memcpy(oldsfp, sfp, old_isize); /* @@ -1096,7 +1096,7 @@ xfs_dir2_sf_toino4( * Don't want xfs_idata_realloc copying the data here. */ oldsize = dp->i_df.if_bytes; - buf = kmem_alloc(oldsize, KM_SLEEP); + buf = kmem_alloc(oldsize, 0); oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; ASSERT(oldsfp->i8count == 1); memcpy(buf, oldsfp, oldsize); @@ -1169,7 +1169,7 @@ xfs_dir2_sf_toino8( * Don't want xfs_idata_realloc copying the data here. */ oldsize = dp->i_df.if_bytes; - buf = kmem_alloc(oldsize, KM_SLEEP); + buf = kmem_alloc(oldsize, 0); oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; ASSERT(oldsfp->i8count == 0); memcpy(buf, oldsfp, oldsize); diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 52d03a3a02a4..39dd2b908106 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -287,7 +287,7 @@ struct xfs_ag_geometry { uint32_t ag_ifree; /* o: inodes free */ uint32_t ag_sick; /* o: sick things in ag */ uint32_t ag_checked; /* o: checked metadata in ag */ - uint32_t ag_reserved32; /* o: zero */ + uint32_t ag_flags; /* i/o: flags for this ag */ uint64_t ag_reserved[12];/* o: zero */ }; #define XFS_AG_GEOM_SICK_SB (1 << 0) /* superblock */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 04377ab75863..588d44613094 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2787,8 +2787,13 @@ xfs_ialloc_setup_geometry( igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, inodes); - /* Set the maximum inode count for this filesystem. */ - if (sbp->sb_imax_pct) { + /* + * Set the maximum inode count for this filesystem, being careful not + * to use obviously garbage sb_inopblog/sb_inopblock values. Regular + * users should never get here due to failing sb verification, but + * certain users (xfs_db) need to be usable even with corrupt metadata. + */ + if (sbp->sb_imax_pct && igeo->ialloc_blks) { /* * Make sure the maximum inode count is a multiple * of the units we allocate inodes in. diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 27aa3f2bc4bc..7bc87408f1a0 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -616,7 +616,7 @@ xfs_iext_realloc_root( * sequence counter is seen before the modifications to the extent tree itself * take effect. */ -static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp, int state) +static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp) { WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1); } @@ -633,7 +633,7 @@ xfs_iext_insert( struct xfs_iext_leaf *new = NULL; int nr_entries, i; - xfs_iext_inc_seq(ifp, state); + xfs_iext_inc_seq(ifp); if (ifp->if_height == 0) xfs_iext_alloc_root(ifp, cur); @@ -875,7 +875,7 @@ xfs_iext_remove( ASSERT(ifp->if_u1.if_root != NULL); ASSERT(xfs_iext_valid(ifp, cur)); - xfs_iext_inc_seq(ifp, state); + xfs_iext_inc_seq(ifp); nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1; for (i = cur->pos; i < nr_entries; i++) @@ -983,7 +983,7 @@ xfs_iext_update_extent( { struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); - xfs_iext_inc_seq(ifp, state); + xfs_iext_inc_seq(ifp); if (cur->pos == 0) { struct xfs_bmbt_irec old; diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index bf3e04018246..c643beeb5a24 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -94,7 +94,7 @@ xfs_iformat_fork( return 0; ASSERT(ip->i_afp == NULL); - ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); + ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS); switch (dip->di_aformat) { case XFS_DINODE_FMT_LOCAL: @@ -147,7 +147,7 @@ xfs_init_local_fork( if (size) { real_size = roundup(mem_size, 4); - ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); + ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS); memcpy(ifp->if_u1.if_data, data, size); if (zero_terminate) ifp->if_u1.if_data[size] = '\0'; @@ -302,7 +302,7 @@ xfs_iformat_btree( } ifp->if_broot_bytes = size; - ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); + ifp->if_broot = kmem_alloc(size, KM_NOFS); ASSERT(ifp->if_broot != NULL); /* * Copy and convert from the on-disk structure @@ -367,7 +367,7 @@ xfs_iroot_realloc( */ if (ifp->if_broot_bytes == 0) { new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); - ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); + ifp->if_broot = kmem_alloc(new_size, KM_NOFS); ifp->if_broot_bytes = (int)new_size; return; } @@ -382,7 +382,7 @@ xfs_iroot_realloc( new_max = cur_max + rec_diff; new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, - KM_SLEEP | KM_NOFS); + KM_NOFS); op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, ifp->if_broot_bytes); np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, @@ -408,7 +408,7 @@ xfs_iroot_realloc( else new_size = 0; if (new_size > 0) { - new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); + new_broot = kmem_alloc(new_size, KM_NOFS); /* * First copy over the btree block header. */ @@ -492,7 +492,7 @@ xfs_idata_realloc( * We enforce that here. */ ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data, - roundup(new_size, 4), KM_SLEEP | KM_NOFS); + roundup(new_size, 4), KM_NOFS); ifp->if_bytes = new_size; } @@ -683,7 +683,7 @@ xfs_ifork_init_cow( return; ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone, - KM_SLEEP | KM_NOFS); + KM_NOFS); ip->i_cowfp->if_flags = XFS_IFEXTENTS; ip->i_cformat = XFS_DINODE_FMT_EXTENTS; ip->i_cnextents = 0; diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 51bb9bdb0e84..9a7fadb1361c 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1174,7 +1174,7 @@ out_cur: /* * Record a refcount intent for later processing. */ -static int +static void __xfs_refcount_add( struct xfs_trans *tp, enum xfs_refcount_intent_type type, @@ -1189,44 +1189,43 @@ __xfs_refcount_add( blockcount); ri = kmem_alloc(sizeof(struct xfs_refcount_intent), - KM_SLEEP | KM_NOFS); + KM_NOFS); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_startblock = startblock; ri->ri_blockcount = blockcount; xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); - return 0; } /* * Increase the reference count of the blocks backing a file's extent. */ -int +void xfs_refcount_increase_extent( struct xfs_trans *tp, struct xfs_bmbt_irec *PREV) { if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) - return 0; + return; - return __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, - PREV->br_startblock, PREV->br_blockcount); + __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, + PREV->br_blockcount); } /* * Decrease the reference count of the blocks backing a file's extent. */ -int +void xfs_refcount_decrease_extent( struct xfs_trans *tp, struct xfs_bmbt_irec *PREV) { if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) - return 0; + return; - return __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, - PREV->br_startblock, PREV->br_blockcount); + __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, + PREV->br_blockcount); } /* @@ -1541,47 +1540,40 @@ __xfs_refcount_cow_free( } /* Record a CoW staging extent in the refcount btree. */ -int +void xfs_refcount_alloc_cow_extent( struct xfs_trans *tp, xfs_fsblock_t fsb, xfs_extlen_t len) { struct xfs_mount *mp = tp->t_mountp; - int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return 0; + return; - error = __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); - if (error) - return error; + __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); /* Add rmap entry */ - return xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), + xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); } /* Forget a CoW staging event in the refcount btree. */ -int +void xfs_refcount_free_cow_extent( struct xfs_trans *tp, xfs_fsblock_t fsb, xfs_extlen_t len) { struct xfs_mount *mp = tp->t_mountp; - int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return 0; + return; /* Remove rmap entry */ - error = xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), + xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); - if (error) - return error; - - return __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); + __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); } struct xfs_refcount_recovery { @@ -1602,7 +1594,7 @@ xfs_refcount_recover_extent( if (be32_to_cpu(rec->refc.rc_refcount) != 1) return -EFSCORRUPTED; - rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); + rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); list_add_tail(&rr->rr_list, debris); @@ -1679,10 +1671,8 @@ xfs_refcount_recover_cow_leftovers( /* Free the orphan record */ agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; fsb = XFS_AGB_TO_FSB(mp, agno, agbno); - error = xfs_refcount_free_cow_extent(tp, fsb, + xfs_refcount_free_cow_extent(tp, fsb, rr->rr_rrec.rc_blockcount); - if (error) - goto out_trans; /* Free the block. */ xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 1d9c518575e7..209795539c8d 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -29,9 +29,9 @@ struct xfs_refcount_intent { xfs_extlen_t ri_blockcount; }; -extern int xfs_refcount_increase_extent(struct xfs_trans *tp, +void xfs_refcount_increase_extent(struct xfs_trans *tp, struct xfs_bmbt_irec *irec); -extern int xfs_refcount_decrease_extent(struct xfs_trans *tp, +void xfs_refcount_decrease_extent(struct xfs_trans *tp, struct xfs_bmbt_irec *irec); extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, @@ -45,10 +45,10 @@ extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_end_of_shared); -extern int xfs_refcount_alloc_cow_extent(struct xfs_trans *tp, - xfs_fsblock_t fsb, xfs_extlen_t len); -extern int xfs_refcount_free_cow_extent(struct xfs_trans *tp, - xfs_fsblock_t fsb, xfs_extlen_t len); +void xfs_refcount_alloc_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb, + xfs_extlen_t len); +void xfs_refcount_free_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb, + xfs_extlen_t len); extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, xfs_agnumber_t agno); diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index e6aeb390b2fb..38e9414878b3 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -168,7 +168,6 @@ xfs_rmap_btrec_to_irec( union xfs_btree_rec *rec, struct xfs_rmap_irec *irec) { - irec->rm_flags = 0; irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock); irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount); irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner); @@ -254,15 +253,15 @@ xfs_rmap_find_left_neighbor_helper( rec->rm_flags); if (rec->rm_owner != info->high.rm_owner) - return XFS_BTREE_QUERY_RANGE_CONTINUE; + return 0; if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && rec->rm_offset + rec->rm_blockcount - 1 != info->high.rm_offset) - return XFS_BTREE_QUERY_RANGE_CONTINUE; + return 0; *info->irec = *rec; *info->stat = 1; - return XFS_BTREE_QUERY_RANGE_ABORT; + return -ECANCELED; } /* @@ -305,7 +304,7 @@ xfs_rmap_find_left_neighbor( error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_find_left_neighbor_helper, &info); - if (error == XFS_BTREE_QUERY_RANGE_ABORT) + if (error == -ECANCELED) error = 0; if (*stat) trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, @@ -330,16 +329,16 @@ xfs_rmap_lookup_le_range_helper( rec->rm_flags); if (rec->rm_owner != info->high.rm_owner) - return XFS_BTREE_QUERY_RANGE_CONTINUE; + return 0; if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && (rec->rm_offset > info->high.rm_offset || rec->rm_offset + rec->rm_blockcount <= info->high.rm_offset)) - return XFS_BTREE_QUERY_RANGE_CONTINUE; + return 0; *info->irec = *rec; *info->stat = 1; - return XFS_BTREE_QUERY_RANGE_ABORT; + return -ECANCELED; } /* @@ -377,7 +376,7 @@ xfs_rmap_lookup_le_range( cur->bc_private.a.agno, bno, 0, owner, offset, flags); error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_lookup_le_range_helper, &info); - if (error == XFS_BTREE_QUERY_RANGE_ABORT) + if (error == -ECANCELED) error = 0; if (*stat) trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, @@ -2268,7 +2267,7 @@ xfs_rmap_update_is_needed( * Record a rmap intent; the list is kept sorted first by AG and then by * increasing age. */ -static int +static void __xfs_rmap_add( struct xfs_trans *tp, enum xfs_rmap_intent_type type, @@ -2287,7 +2286,7 @@ __xfs_rmap_add( bmap->br_blockcount, bmap->br_state); - ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS); + ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_NOFS); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_owner = owner; @@ -2295,11 +2294,10 @@ __xfs_rmap_add( ri->ri_bmap = *bmap; xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list); - return 0; } /* Map an extent into a file. */ -int +void xfs_rmap_map_extent( struct xfs_trans *tp, struct xfs_inode *ip, @@ -2307,15 +2305,15 @@ xfs_rmap_map_extent( struct xfs_bmbt_irec *PREV) { if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) - return 0; + return; - return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? + __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino, whichfork, PREV); } /* Unmap an extent out of a file. */ -int +void xfs_rmap_unmap_extent( struct xfs_trans *tp, struct xfs_inode *ip, @@ -2323,9 +2321,9 @@ xfs_rmap_unmap_extent( struct xfs_bmbt_irec *PREV) { if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) - return 0; + return; - return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? + __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino, whichfork, PREV); } @@ -2336,7 +2334,7 @@ xfs_rmap_unmap_extent( * Note that tp can be NULL here as no transaction is used for COW fork * unwritten conversion. */ -int +void xfs_rmap_convert_extent( struct xfs_mount *mp, struct xfs_trans *tp, @@ -2345,15 +2343,15 @@ xfs_rmap_convert_extent( struct xfs_bmbt_irec *PREV) { if (!xfs_rmap_update_is_needed(mp, whichfork)) - return 0; + return; - return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? + __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino, whichfork, PREV); } /* Schedule the creation of an rmap for non-file data. */ -int +void xfs_rmap_alloc_extent( struct xfs_trans *tp, xfs_agnumber_t agno, @@ -2364,18 +2362,18 @@ xfs_rmap_alloc_extent( struct xfs_bmbt_irec bmap; if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) - return 0; + return; bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); bmap.br_blockcount = len; bmap.br_startoff = 0; bmap.br_state = XFS_EXT_NORM; - return __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap); + __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap); } /* Schedule the deletion of an rmap for non-file data. */ -int +void xfs_rmap_free_extent( struct xfs_trans *tp, xfs_agnumber_t agno, @@ -2386,14 +2384,14 @@ xfs_rmap_free_extent( struct xfs_bmbt_irec bmap; if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) - return 0; + return; bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); bmap.br_blockcount = len; bmap.br_startoff = 0; bmap.br_state = XFS_EXT_NORM; - return __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap); + __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap); } /* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */ @@ -2511,7 +2509,7 @@ xfs_rmap_has_other_keys_helper( ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags) return 0; rks->has_rmap = true; - return XFS_BTREE_QUERY_RANGE_ABORT; + return -ECANCELED; } /* @@ -2540,8 +2538,11 @@ xfs_rmap_has_other_keys( error = xfs_rmap_query_range(cur, &low, &high, xfs_rmap_has_other_keys_helper, &rks); + if (error < 0) + return error; + *has_rmap = rks.has_rmap; - return error; + return 0; } const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = { diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index e21ed0294e5c..abe633403fd1 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -68,6 +68,7 @@ xfs_rmap_irec_offset_unpack( if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS)) return -EFSCORRUPTED; irec->rm_offset = XFS_RMAP_OFF(offset); + irec->rm_flags = 0; if (offset & XFS_RMAP_OFF_ATTR_FORK) irec->rm_flags |= XFS_RMAP_ATTR_FORK; if (offset & XFS_RMAP_OFF_BMBT_BLOCK) @@ -161,16 +162,16 @@ struct xfs_rmap_intent { }; /* functions for updating the rmapbt based on bmbt map/unmap operations */ -int xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, +void xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *imap); -int xfs_rmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, +void xfs_rmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *imap); -int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_trans *tp, +void xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *imap); -int xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno, +void xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner); -int xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno, +void xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner); void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index e0641b7337b3..c45acbd3add9 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -177,10 +177,4 @@ struct xfs_ino_geometry { unsigned int agino_log; /* #bits for agino in inum */ }; -/* Keep iterating the data structure. */ -#define XFS_ITER_CONTINUE (0) - -/* Stop iterating the data structure. */ -#define XFS_ITER_ABORT (1) - #endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 802b34cd10fe..300b3e91ca3a 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -169,6 +169,14 @@ typedef struct xfs_bmbt_irec xfs_exntst_t br_state; /* extent state */ } xfs_bmbt_irec_t; +/* per-AG block reservation types */ +enum xfs_ag_resv_type { + XFS_AG_RESV_NONE = 0, + XFS_AG_RESV_AGFL, + XFS_AG_RESV_METADATA, + XFS_AG_RESV_RMAPBT, +}; + /* * Type verifier functions */ |