summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/kmem.c18
-rw-r--r--fs/xfs/kmem.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c34
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c87
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c26
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h2
-rw-r--r--fs/xfs/xfs_aops.c59
-rw-r--r--fs/xfs/xfs_dir2_readdir.c11
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_inode.c14
-rw-r--r--fs/xfs/xfs_iomap.c25
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_reflink.c23
-rw-r--r--fs/xfs/xfs_reflink.h4
-rw-r--r--fs/xfs/xfs_super.c2
18 files changed, 215 insertions, 111 deletions
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 2dfdc62f795e..70a5b55e0870 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -25,24 +25,6 @@
#include "kmem.h"
#include "xfs_message.h"
-/*
- * Greedy allocation. May fail and may return vmalloced memory.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
- void *ptr;
- size_t kmsize = maxsize;
-
- while (!(ptr = vzalloc(kmsize))) {
- if ((kmsize >>= 1) <= minsize)
- kmsize = minsize;
- }
- if (ptr)
- *size = kmsize;
- return ptr;
-}
-
void *
kmem_alloc(size_t size, xfs_km_flags_t flags)
{
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 689f746224e7..f0fc84fcaac2 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -69,8 +69,6 @@ static inline void kmem_free(const void *ptr)
}
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
static inline void *
kmem_zalloc(size_t size, xfs_km_flags_t flags)
{
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a9c66d47757a..9bd104f32908 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree(
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
} else if (dfops->dop_low) {
-try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
args.fsbno = *firstblock;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -790,13 +790,17 @@ try_another_ag:
if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
args.fsbno == NULLFSBLOCK &&
args.type == XFS_ALLOCTYPE_NEAR_BNO) {
- dfops->dop_low = true;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
goto try_another_ag;
}
+ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
+ xfs_iroot_realloc(ip, -1, whichfork);
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return -ENOSPC;
+ }
/*
* Allocation can't fail, the space was reserved.
*/
- ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(*firstblock == NULLFSBLOCK ||
args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
*firstblock = cur->bc_private.b.firstblock = args.fsbno;
@@ -4150,6 +4154,19 @@ xfs_bmapi_read(
return 0;
}
+/*
+ * Add a delayed allocation extent to an inode. Blocks are reserved from the
+ * global pool and the extent inserted into the inode in-core extent tree.
+ *
+ * On entry, got refers to the first extent beyond the offset of the extent to
+ * allocate or eof is specified if no such extent exists. On return, got refers
+ * to the extent record that was inserted to the inode fork.
+ *
+ * Note that the allocated extent may have been merged with contiguous extents
+ * during insertion into the inode fork. Thus, got does not reflect the current
+ * state of the inode fork on return. If necessary, the caller can use lastx to
+ * look up the updated record in the inode fork.
+ */
int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
@@ -4236,13 +4253,8 @@ xfs_bmapi_reserve_delalloc(
got->br_startblock = nullstartblock(indlen);
got->br_blockcount = alen;
got->br_state = XFS_EXT_NORM;
- xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
- /*
- * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
- * might have merged it into one of the neighbouring ones.
- */
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+ xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
/*
* Tag the inode if blocks were preallocated. Note that COW fork
@@ -4254,10 +4266,6 @@ xfs_bmapi_reserve_delalloc(
if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
xfs_inode_set_cowblocks_tag(ip);
- ASSERT(got->br_startoff <= aoff);
- ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
- ASSERT(isnullstartblock(got->br_startblock));
- ASSERT(got->br_state == XFS_EXT_NORM);
return 0;
out_unreserve_blocks:
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index f93072b58a58..fd55db479385 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -447,8 +447,8 @@ xfs_bmbt_alloc_block(
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
-try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
/*
* Make sure there is sufficient room left in the AG to
* complete a full tree split for an extent insert. If
@@ -488,8 +488,8 @@ try_another_ag:
if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
args.fsbno == NULLFSBLOCK &&
args.type == XFS_ALLOCTYPE_NEAR_BNO) {
- cur->bc_private.b.dfops->dop_low = true;
args.fsbno = cur->bc_private.b.firstblock;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
goto try_another_ag;
}
@@ -506,7 +506,7 @@ try_another_ag:
goto error0;
cur->bc_private.b.dfops->dop_low = true;
}
- if (args.fsbno == NULLFSBLOCK) {
+ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index d04547fcf274..eb00bc133bca 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
+ int size);
/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index c6809ff41197..96b45cd6c63f 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -629,6 +629,93 @@ xfs_dir2_sf_check(
}
#endif /* DEBUG */
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *sfp,
+ int size)
+{
+ struct xfs_dir2_sf_entry *sfep;
+ struct xfs_dir2_sf_entry *next_sfep;
+ char *endp;
+ const struct xfs_dir_ops *dops;
+ xfs_ino_t ino;
+ int i;
+ int i8count;
+ int offset;
+ __uint8_t filetype;
+
+ dops = xfs_dir_get_ops(mp, NULL);
+
+ /*
+ * Give up if the directory is way too short.
+ */
+ XFS_WANT_CORRUPTED_RETURN(mp, size >
+ offsetof(struct xfs_dir2_sf_hdr, parent));
+ XFS_WANT_CORRUPTED_RETURN(mp, size >=
+ xfs_dir2_sf_hdr_size(sfp->i8count));
+
+ endp = (char *)sfp + size;
+
+ /* Check .. entry */
+ ino = dops->sf_get_parent_ino(sfp);
+ i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+ XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+ offset = dops->data_first_offset;
+
+ /* Check all reported entries */
+ sfep = xfs_dir2_sf_firstentry(sfp);
+ for (i = 0; i < sfp->count; i++) {
+ /*
+ * struct xfs_dir2_sf_entry has a variable length.
+ * Check the fixed-offset parts of the structure are
+ * within the data buffer.
+ */
+ XFS_WANT_CORRUPTED_RETURN(mp,
+ ((char *)sfep + sizeof(*sfep)) < endp);
+
+ /* Don't allow names with known bad length. */
+ XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
+ XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+
+ /*
+ * Check that the variable-length part of the structure is
+ * within the data buffer. The next entry starts after the
+ * name component, so nextentry is an acceptable test.
+ */
+ next_sfep = dops->sf_nextentry(sfp, sfep);
+ XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+
+ /* Check that the offsets always increase. */
+ XFS_WANT_CORRUPTED_RETURN(mp,
+ xfs_dir2_sf_get_offset(sfep) >= offset);
+
+ /* Check the inode number. */
+ ino = dops->sf_get_ino(sfp, sfep);
+ i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+ XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+
+ /* Check the file type. */
+ filetype = dops->sf_get_ftype(sfep);
+ XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+
+ offset = xfs_dir2_sf_get_offset(sfep) +
+ dops->data_entsize(sfep->namelen);
+
+ sfep = next_sfep;
+ }
+ XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
+ XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+
+ /* Make sure this whole thing ought to be in local format. */
+ XFS_WANT_CORRUPTED_RETURN(mp, offset +
+ (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+ (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+
+ return 0;
+}
+
/*
* Create a new (shortform) directory.
*/
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 25c1e078aef6..9653e964eda4 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -33,6 +33,8 @@
#include "xfs_trace.h"
#include "xfs_attr_sf.h"
#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
kmem_zone_t *xfs_ifork_zone;
@@ -320,6 +322,7 @@ xfs_iformat_local(
int whichfork,
int size)
{
+ int error;
/*
* If the size is unreasonable, then something
@@ -336,6 +339,14 @@ xfs_iformat_local(
return -EFSCORRUPTED;
}
+ if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+ error = xfs_dir2_sf_verify(ip->i_mount,
+ (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
+ size);
+ if (error)
+ return error;
+ }
+
xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
return 0;
}
@@ -856,7 +867,7 @@ xfs_iextents_copy(
* In these cases, the format always takes precedence, because the
* format indicates the current state of the fork.
*/
-void
+int
xfs_iflush_fork(
xfs_inode_t *ip,
xfs_dinode_t *dip,
@@ -866,6 +877,7 @@ xfs_iflush_fork(
char *cp;
xfs_ifork_t *ifp;
xfs_mount_t *mp;
+ int error;
static const short brootflag[2] =
{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
static const short dataflag[2] =
@@ -874,7 +886,7 @@ xfs_iflush_fork(
{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
if (!iip)
- return;
+ return 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
/*
* This can happen if we gave up in iformat in an error path,
@@ -882,12 +894,19 @@ xfs_iflush_fork(
*/
if (!ifp) {
ASSERT(whichfork == XFS_ATTR_FORK);
- return;
+ return 0;
}
cp = XFS_DFORK_PTR(dip, whichfork);
mp = ip->i_mount;
switch (XFS_IFORK_FORMAT(ip, whichfork)) {
case XFS_DINODE_FMT_LOCAL:
+ if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+ error = xfs_dir2_sf_verify(mp,
+ (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
+ ifp->if_bytes);
+ if (error)
+ return error;
+ }
if ((iip->ili_fields & dataflag[whichfork]) &&
(ifp->if_bytes > 0)) {
ASSERT(ifp->if_u1.if_data != NULL);
@@ -940,6 +959,7 @@ xfs_iflush_fork(
ASSERT(0);
break;
}
+ return 0;
}
/*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 7fb8365326d1..132dc59fdde6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+int xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
struct xfs_inode_log_item *, int);
void xfs_idestroy_fork(struct xfs_inode *, int);
void xfs_idata_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index bf65a9ea8642..61494295d92f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -274,54 +274,49 @@ xfs_end_io(
struct xfs_ioend *ioend =
container_of(work, struct xfs_ioend, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ xfs_off_t offset = ioend->io_offset;
+ size_t size = ioend->io_size;
int error = ioend->io_bio->bi_error;
/*
- * Set an error if the mount has shut down and proceed with end I/O
- * processing so it can perform whatever cleanups are necessary.
+ * Just clean up the in-memory strutures if the fs has been shut down.
*/
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
error = -EIO;
+ goto done;
+ }
/*
- * For a CoW extent, we need to move the mapping from the CoW fork
- * to the data fork. If instead an error happened, just dump the
- * new blocks.
+ * Clean up any COW blocks on an I/O error.
*/
- if (ioend->io_type == XFS_IO_COW) {
- if (error)
- goto done;
- if (ioend->io_bio->bi_error) {
- error = xfs_reflink_cancel_cow_range(ip,
- ioend->io_offset, ioend->io_size);
- goto done;
+ if (unlikely(error)) {
+ switch (ioend->io_type) {
+ case XFS_IO_COW:
+ xfs_reflink_cancel_cow_range(ip, offset, size, true);
+ break;
}
- error = xfs_reflink_end_cow(ip, ioend->io_offset,
- ioend->io_size);
- if (error)
- goto done;
+
+ goto done;
}
/*
- * For unwritten extents we need to issue transactions to convert a
- * range to normal written extens after the data I/O has finished.
- * Detecting and handling completion IO errors is done individually
- * for each case as different cleanup operations need to be performed
- * on error.
+ * Success: commit the COW or unwritten blocks if needed.
*/
- if (ioend->io_type == XFS_IO_UNWRITTEN) {
- if (error)
- goto done;
- error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
- ioend->io_size);
- } else if (ioend->io_append_trans) {
- error = xfs_setfilesize_ioend(ioend, error);
- } else {
- ASSERT(!xfs_ioend_is_append(ioend) ||
- ioend->io_type == XFS_IO_COW);
+ switch (ioend->io_type) {
+ case XFS_IO_COW:
+ error = xfs_reflink_end_cow(ip, offset, size);
+ break;
+ case XFS_IO_UNWRITTEN:
+ error = xfs_iomap_write_unwritten(ip, offset, size);
+ break;
+ default:
+ ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+ break;
}
done:
+ if (ioend->io_append_trans)
+ error = xfs_setfilesize_ioend(ioend, error);
xfs_destroy_ioend(ioend, error);
}
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 003a99b83bd8..ad9396e516f6 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
struct xfs_da_geometry *geo = args->geo;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
- /*
- * Give up if the directory is way too short.
- */
- if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return -EIO;
- }
-
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
- return -EFSCORRUPTED;
-
/*
* If the block number in the offset is out of range, we're done.
*/
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 7234b9748c36..3531f8f72fa5 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
- ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+ ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index edfa6a55b064..c7fe2c2123ab 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1615,7 +1615,7 @@ xfs_itruncate_extents(
/* Remove all pending CoW reservations. */
error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
- last_block);
+ last_block, true);
if (error)
goto out;
@@ -3475,6 +3475,7 @@ xfs_iflush_int(
struct xfs_inode_log_item *iip = ip->i_itemp;
struct xfs_dinode *dip;
struct xfs_mount *mp = ip->i_mount;
+ int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip));
@@ -3557,9 +3558,14 @@ xfs_iflush_int(
if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
ip->i_d.di_flushiter = 0;
- xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
- if (XFS_IFORK_Q(ip))
- xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+ error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ if (XFS_IFORK_Q(ip)) {
+ error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+ if (error)
+ return error;
+ }
xfs_inobp_check(mp, bp);
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 41662fb14e87..288ee5b840d7 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -630,6 +630,11 @@ retry:
goto out_unlock;
}
+ /*
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+ * them out if the write happens to fail.
+ */
+ iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc(
struct xfs_inode *ip,
loff_t offset,
loff_t length,
- ssize_t written)
+ ssize_t written,
+ struct iomap *iomap)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_fsb;
xfs_fileoff_t end_fsb;
int error = 0;
- /* behave as if the write failed if drop writes is enabled */
- if (xfs_mp_drop_writes(mp))
+ /*
+ * Behave as if the write failed if drop writes is enabled. Set the NEW
+ * flag to force delalloc cleanup.
+ */
+ if (xfs_mp_drop_writes(mp)) {
+ iomap->flags |= IOMAP_F_NEW;
written = 0;
+ }
/*
* start_fsb refers to the first unused block after a short write. If
@@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc(
end_fsb = XFS_B_TO_FSB(mp, offset + length);
/*
- * Trim back delalloc blocks if we didn't manage to write the whole
- * range reserved.
+ * Trim delalloc blocks if they were allocated by this write and we
+ * didn't manage to write the whole range.
*
* We don't need to care about racing delalloc as we hold i_mutex
* across the reserve/allocate/unreserve calls. If there are delalloc
* blocks in the range, they are ours.
*/
- if (start_fsb < end_fsb) {
+ if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
XFS_FSB_TO_B(mp, end_fsb) - 1);
@@ -1131,7 +1142,7 @@ xfs_file_iomap_end(
{
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
- length, written);
+ length, written, iomap);
return 0;
}
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 66e881790c17..2a6d9b1558e0 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -361,7 +361,6 @@ xfs_bulkstat(
xfs_agino_t agino; /* inode # in allocation group */
xfs_agnumber_t agno; /* allocation group number */
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
- size_t irbsize; /* size of irec buffer in bytes */
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
int nirbuf; /* size of irbuf */
int ubcount; /* size of user's buffer */
@@ -388,11 +387,10 @@ xfs_bulkstat(
*ubcountp = 0;
*done = 0;
- irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
+ irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
if (!irbuf)
return -ENOMEM;
-
- nirbuf = irbsize / sizeof(*irbuf);
+ nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
/*
* Loop over the allocation groups, starting from the last
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 450bde68bb75..688ebff1f663 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -513,8 +513,7 @@ STATIC void
xfs_set_inoalignment(xfs_mount_t *mp)
{
if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
else
mp->m_inoalign_mask = 0;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index da6d08fb359c..4a84c5ea266d 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow(
}
/*
- * Cancel all pending CoW reservations for some block range of an inode.
+ * Cancel CoW reservations for some block range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
*/
int
xfs_reflink_cancel_cow_blocks(
struct xfs_inode *ip,
struct xfs_trans **tpp,
xfs_fileoff_t offset_fsb,
- xfs_fileoff_t end_fsb)
+ xfs_fileoff_t end_fsb,
+ bool cancel_real)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got, del;
@@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks(
&idx, &got, &del);
if (error)
break;
- } else {
+ } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
xfs_trans_ijoin(*tpp, ip, 0);
xfs_defer_init(&dfops, &firstfsb);
@@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks(
}
/*
- * Cancel all pending CoW reservations for some byte range of an inode.
+ * Cancel CoW reservations for some byte range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
*/
int
xfs_reflink_cancel_cow_range(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t count)
+ xfs_off_t count,
+ bool cancel_real)
{
struct xfs_trans *tp;
xfs_fileoff_t offset_fsb;
@@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range(
xfs_trans_ijoin(tp, ip, 0);
/* Scrape out the old CoW reservations */
- error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
+ error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
+ cancel_real);
if (error)
goto out_cancel;
@@ -1450,7 +1459,7 @@ next:
* We didn't find any shared blocks so turn off the reflink flag.
* First, get rid of any leftover CoW mappings.
*/
- error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
+ error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
if (error)
return error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 33ac9b8db683..d29a7967f029 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
- xfs_fileoff_t end_fsb);
+ xfs_fileoff_t end_fsb, bool cancel_real);
extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t count);
+ xfs_off_t count, bool cancel_real);
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 890862f2447c..685c042a120f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_destroy_inode(
XFS_STATS_INC(ip->i_mount, vn_remove);
if (xfs_is_reflink_inode(ip)) {
- error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+ error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
xfs_warn(ip->i_mount,
"Error %d while evicting CoW blocks for inode %llu.",