summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig25
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c19
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h18
-rw-r--r--fs/xfs/libxfs/xfs_defer.c232
-rw-r--r--fs/xfs/libxfs/xfs_defer.h37
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h2
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c27
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c11
-rw-r--r--fs/xfs/scrub/dabtree.c14
-rw-r--r--fs/xfs/xfs_bmap_item.c132
-rw-r--r--fs/xfs/xfs_bmap_util.c18
-rw-r--r--fs/xfs/xfs_buf_item_recover.c2
-rw-r--r--fs/xfs/xfs_dquot.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c44
-rw-r--r--fs/xfs/xfs_file.c40
-rw-r--r--fs/xfs/xfs_filestream.c34
-rw-r--r--fs/xfs/xfs_fsmap.c48
-rw-r--r--fs/xfs/xfs_fsmap.h6
-rw-r--r--fs/xfs/xfs_inode.c123
-rw-r--r--fs/xfs/xfs_ioctl.c144
-rw-r--r--fs/xfs/xfs_iops.c4
-rw-r--r--fs/xfs/xfs_linux.h7
-rw-r--r--fs/xfs/xfs_log.c44
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_recover.c229
-rw-r--r--fs/xfs/xfs_qm.c16
-rw-r--r--fs/xfs/xfs_refcount_item.c51
-rw-r--r--fs/xfs/xfs_rmap_item.c42
-rw-r--r--fs/xfs/xfs_rtalloc.c31
-rw-r--r--fs/xfs/xfs_stats.c4
-rw-r--r--fs/xfs/xfs_stats.h1
-rw-r--r--fs/xfs/xfs_super.c47
-rw-r--r--fs/xfs/xfs_sysctl.c36
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c2
-rw-r--r--fs/xfs/xfs_trans.h33
-rw-r--r--fs/xfs/xfs_trans_dquot.c43
38 files changed, 1047 insertions, 528 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index e685299eb3d2..9fac5ea8d0e4 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -22,6 +22,31 @@ config XFS_FS
system of your root partition is compiled as a module, you'll need
to use an initial ramdisk (initrd) to boot.
+config XFS_SUPPORT_V4
+ bool "Support deprecated V4 (crc=0) format"
+ depends on XFS_FS
+ default y
+ help
+ The V4 filesystem format lacks certain features that are supported
+ by the V5 format, such as metadata checksumming, strengthened
+ metadata verification, and the ability to store timestamps past the
+ year 2038. Because of this, the V4 format is deprecated. All users
+ should upgrade by backing up their files, reformatting, and restoring
+ from the backup.
+
+ Administrators and users can detect a V4 filesystem by running
+ xfs_info against a filesystem mountpoint and checking for a string
+ beginning with "crc=". If the string "crc=0" is found, the
+ filesystem is a V4 filesystem. If no such string is found, please
+ upgrade xfsprogs to the latest version and try again.
+
+ This option will become default N in September 2025. Support for the
+ V4 format will be removed entirely in September 2030. Distributors
+ can say N here to withdraw support earlier.
+
+ To continue supporting the old V4 format (crc=0), say Y.
+ To close off an attack surface, say N.
+
config XFS_QUOTA
bool "XFS Quota support"
depends on XFS_FS
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 3f80cede7406..48d8e9caf86f 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -96,8 +96,6 @@ xfs_attr3_rmt_verify(
{
struct xfs_attr3_rmt_hdr *rmt = ptr;
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return __this_address;
if (!xfs_verify_magic(bp, rmt->rm_magic))
return __this_address;
if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1b0a01b06a05..d9a692484eae 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5046,20 +5046,25 @@ xfs_bmap_del_extent_real(
flags = XFS_ILOG_CORE;
if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
- xfs_fsblock_t bno;
xfs_filblks_t len;
xfs_extlen_t mod;
- bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
- &mod);
- ASSERT(mod == 0);
len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
&mod);
ASSERT(mod == 0);
- error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
- if (error)
- goto done;
+ if (!(bflags & XFS_BMAPI_REMAP)) {
+ xfs_fsblock_t bno;
+
+ bno = div_u64_rem(del->br_startblock,
+ mp->m_sb.sb_rextsize, &mod);
+ ASSERT(mod == 0);
+
+ error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+ if (error)
+ goto done;
+ }
+
do_fx = 0;
nblks = len * mp->m_sb.sb_rextsize;
qfield = XFS_TRANS_DQ_RTBCOUNT;
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index b40a4e80f5ee..b876b44c0204 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -15,8 +15,8 @@
*/
#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */
#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */
-#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */
-#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */
+#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */
+#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */
typedef struct xfs_da_blkinfo {
__be32 forw; /* previous block in list */
@@ -35,8 +35,8 @@ typedef struct xfs_da_blkinfo {
*/
#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */
#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */
-#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */
-#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */
+#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v3 dirlf single blks */
+#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v3 dirlf multi blks */
struct xfs_da3_blkinfo {
/*
@@ -61,7 +61,7 @@ struct xfs_da3_blkinfo {
* Since we have duplicate keys, use a binary search but always follow
* all match in the block, not just the first match found.
*/
-#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
+#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
typedef struct xfs_da_node_hdr {
struct xfs_da_blkinfo info; /* block type, links, etc. */
@@ -746,14 +746,14 @@ xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
*/
static inline int xfs_attr_leaf_entsize_remote(int nlen)
{
- return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
- XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+ return round_up(sizeof(struct xfs_attr_leaf_name_remote) - 1 +
+ nlen, XFS_ATTR_LEAF_NAME_ALIGN);
}
static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
{
- return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
- XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+ return round_up(sizeof(struct xfs_attr_leaf_name_local) - 1 +
+ nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN);
}
static inline int xfs_attr_leaf_entsize_local_max(int bsize)
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index d8f586256add..eff4a127188e 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -16,6 +16,8 @@
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
+#include "xfs_icache.h"
+#include "xfs_log.h"
/*
* Deferred Operations in XFS
@@ -186,8 +188,9 @@ xfs_defer_create_intent(
{
const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
- dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
- dfp->dfp_count, sort);
+ if (!dfp->dfp_intent)
+ dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
+ dfp->dfp_count, sort);
}
/*
@@ -312,22 +315,6 @@ xfs_defer_trans_roll(
}
/*
- * Reset an already used dfops after finish.
- */
-static void
-xfs_defer_reset(
- struct xfs_trans *tp)
-{
- ASSERT(list_empty(&tp->t_dfops));
-
- /*
- * Low mode state transfers across transaction rolls to mirror dfops
- * lifetime. Clear it now that dfops is reset.
- */
- tp->t_flags &= ~XFS_TRANS_LOWMODE;
-}
-
-/*
* Free up any items left in the list.
*/
static void
@@ -360,6 +347,58 @@ xfs_defer_cancel_list(
}
/*
+ * Prevent a log intent item from pinning the tail of the log by logging a
+ * done item to release the intent item; and then log a new intent item.
+ * The caller should provide a fresh transaction and roll it after we're done.
+ */
+static int
+xfs_defer_relog(
+ struct xfs_trans **tpp,
+ struct list_head *dfops)
+{
+ struct xlog *log = (*tpp)->t_mountp->m_log;
+ struct xfs_defer_pending *dfp;
+ xfs_lsn_t threshold_lsn = NULLCOMMITLSN;
+
+
+ ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
+
+ list_for_each_entry(dfp, dfops, dfp_list) {
+ /*
+ * If the log intent item for this deferred op is not a part of
+ * the current log checkpoint, relog the intent item to keep
+ * the log tail moving forward. We're ok with this being racy
+ * because an incorrect decision means we'll be a little slower
+ * at pushing the tail.
+ */
+ if (dfp->dfp_intent == NULL ||
+ xfs_log_item_in_current_chkpt(dfp->dfp_intent))
+ continue;
+
+ /*
+ * Figure out where we need the tail to be in order to maintain
+ * the minimum required free space in the log. Only sample
+ * the log threshold once per call.
+ */
+ if (threshold_lsn == NULLCOMMITLSN) {
+ threshold_lsn = xlog_grant_push_threshold(log, 0);
+ if (threshold_lsn == NULLCOMMITLSN)
+ break;
+ }
+ if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
+ continue;
+
+ trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
+ XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
+ dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
+ }
+
+ if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
+ return xfs_defer_trans_roll(tpp);
+ return 0;
+}
+
+/*
* Log an intent-done item for the first pending intent, and finish the work
* items.
*/
@@ -390,6 +429,7 @@ xfs_defer_finish_one(
list_add(li, &dfp->dfp_work);
dfp->dfp_count++;
dfp->dfp_done = NULL;
+ dfp->dfp_intent = NULL;
xfs_defer_create_intent(tp, dfp, false);
}
@@ -428,13 +468,27 @@ xfs_defer_finish_noroll(
/* Until we run out of pending work to finish... */
while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
+ /*
+ * Deferred items that are created in the process of finishing
+ * other deferred work items should be queued at the head of
+ * the pending list, which puts them ahead of the deferred work
+ * that was created by the caller. This keeps the number of
+ * pending work items to a minimum, which decreases the amount
+ * of time that any one intent item can stick around in memory,
+ * pinning the log tail.
+ */
xfs_defer_create_intents(*tp);
- list_splice_tail_init(&(*tp)->t_dfops, &dop_pending);
+ list_splice_init(&(*tp)->t_dfops, &dop_pending);
error = xfs_defer_trans_roll(tp);
if (error)
goto out_shutdown;
+ /* Possibly relog intent items to keep the log moving. */
+ error = xfs_defer_relog(tp, &dop_pending);
+ if (error)
+ goto out_shutdown;
+
dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
dfp_list);
error = xfs_defer_finish_one(*tp, dfp);
@@ -475,7 +529,10 @@ xfs_defer_finish(
return error;
}
}
- xfs_defer_reset(*tp);
+
+ /* Reset LOWMODE now that we've finished all the dfops. */
+ ASSERT(list_empty(&(*tp)->t_dfops));
+ (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
return 0;
}
@@ -549,6 +606,139 @@ xfs_defer_move(
* that behavior.
*/
dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
+ stp->t_flags &= ~XFS_TRANS_LOWMODE;
+}
+
+/*
+ * Prepare a chain of fresh deferred ops work items to be completed later. Log
+ * recovery requires the ability to put off until later the actual finishing
+ * work so that it can process unfinished items recovered from the log in
+ * correct order.
+ *
+ * Create and log intent items for all the work that we're capturing so that we
+ * can be assured that the items will get replayed if the system goes down
+ * before log recovery gets a chance to finish the work it put off. The entire
+ * deferred ops state is transferred to the capture structure and the
+ * transaction is then ready for the caller to commit it. If there are no
+ * intent items to capture, this function returns NULL.
+ *
+ * If capture_ip is not NULL, the capture structure will obtain an extra
+ * reference to the inode.
+ */
+static struct xfs_defer_capture *
+xfs_defer_ops_capture(
+ struct xfs_trans *tp,
+ struct xfs_inode *capture_ip)
+{
+ struct xfs_defer_capture *dfc;
+
+ if (list_empty(&tp->t_dfops))
+ return NULL;
+
+ /* Create an object to capture the defer ops. */
+ dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
+ INIT_LIST_HEAD(&dfc->dfc_list);
+ INIT_LIST_HEAD(&dfc->dfc_dfops);
+
+ xfs_defer_create_intents(tp);
+
+ /* Move the dfops chain and transaction state to the capture struct. */
+ list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
+ dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
+ tp->t_flags &= ~XFS_TRANS_LOWMODE;
+
+ /* Capture the remaining block reservations along with the dfops. */
+ dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
+ dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
+
+ /* Preserve the log reservation size. */
+ dfc->dfc_logres = tp->t_log_res;
+
+ /*
+ * Grab an extra reference to this inode and attach it to the capture
+ * structure.
+ */
+ if (capture_ip) {
+ ihold(VFS_I(capture_ip));
+ dfc->dfc_capture_ip = capture_ip;
+ }
+
+ return dfc;
+}
+
+/* Release all resources that we used to capture deferred ops. */
+void
+xfs_defer_ops_release(
+ struct xfs_mount *mp,
+ struct xfs_defer_capture *dfc)
+{
+ xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+ if (dfc->dfc_capture_ip)
+ xfs_irele(dfc->dfc_capture_ip);
+ kmem_free(dfc);
+}
+
+/*
+ * Capture any deferred ops and commit the transaction. This is the last step
+ * needed to finish a log intent item that we recovered from the log. If any
+ * of the deferred ops operate on an inode, the caller must pass in that inode
+ * so that the reference can be transferred to the capture structure. The
+ * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
+ * xfs_defer_ops_continue.
+ */
+int
+xfs_defer_ops_capture_and_commit(
+ struct xfs_trans *tp,
+ struct xfs_inode *capture_ip,
+ struct list_head *capture_list)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_defer_capture *dfc;
+ int error;
+
+ ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL));
+
+ /* If we don't capture anything, commit transaction and exit. */
+ dfc = xfs_defer_ops_capture(tp, capture_ip);
+ if (!dfc)
+ return xfs_trans_commit(tp);
+
+ /* Commit the transaction and add the capture structure to the list. */
+ error = xfs_trans_commit(tp);
+ if (error) {
+ xfs_defer_ops_release(mp, dfc);
+ return error;
+ }
+
+ list_add_tail(&dfc->dfc_list, capture_list);
+ return 0;
+}
+
+/*
+ * Attach a chain of captured deferred ops to a new transaction and free the
+ * capture structure. If an inode was captured, it will be passed back to the
+ * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
+ * The caller now owns the inode reference.
+ */
+void
+xfs_defer_ops_continue(
+ struct xfs_defer_capture *dfc,
+ struct xfs_trans *tp,
+ struct xfs_inode **captured_ipp)
+{
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+ ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
+
+ /* Lock and join the captured inode to the new transaction. */
+ if (dfc->dfc_capture_ip) {
+ xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0);
+ }
+ *captured_ipp = dfc->dfc_capture_ip;
+
+ /* Move captured dfops chain and state to the transaction. */
+ list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
+ tp->t_flags |= dfc->dfc_tpflags;
- xfs_defer_reset(stp);
+ kmem_free(dfc);
}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 6b2ca580f2b0..05472f71fffe 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -8,6 +8,7 @@
struct xfs_btree_cur;
struct xfs_defer_op_type;
+struct xfs_defer_capture;
/*
* Header for deferred operation list.
@@ -63,4 +64,40 @@ extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
+/*
+ * This structure enables a dfops user to detach the chain of deferred
+ * operations from a transaction so that they can be continued later.
+ */
+struct xfs_defer_capture {
+ /* List of other capture structures. */
+ struct list_head dfc_list;
+
+ /* Deferred ops state saved from the transaction. */
+ struct list_head dfc_dfops;
+ unsigned int dfc_tpflags;
+
+ /* Block reservations for the data and rt devices. */
+ unsigned int dfc_blkres;
+ unsigned int dfc_rtxres;
+
+ /* Log reservation saved from the transaction. */
+ unsigned int dfc_logres;
+
+ /*
+ * An inode reference that must be maintained to complete the deferred
+ * work.
+ */
+ struct xfs_inode *dfc_capture_ip;
+};
+
+/*
+ * Functions to capture a chain of deferred operations and continue them later.
+ * This doesn't normally happen except log recovery.
+ */
+int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
+ struct xfs_inode *capture_ip, struct list_head *capture_list);
+void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
+ struct xfs_inode **captured_ipp);
+void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d);
+
#endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 536666143fe7..ef5eaf33d146 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -17,7 +17,7 @@ struct xfs_dinode;
*/
struct xfs_icdinode {
uint16_t di_flushiter; /* incremented on flush */
- uint32_t di_projid; /* owner's project id */
+ prid_t di_projid; /* owner's project id */
xfs_fsize_t di_size; /* number of bytes in file */
xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 27c39268c31f..340c83f76c80 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2505,12 +2505,15 @@ xfs_rmap_map_extent(
int whichfork,
struct xfs_bmbt_irec *PREV)
{
+ enum xfs_rmap_intent_type type = XFS_RMAP_MAP;
+
if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
return;
- __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
- XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino,
- whichfork, PREV);
+ if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
+ type = XFS_RMAP_MAP_SHARED;
+
+ __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
}
/* Unmap an extent out of a file. */
@@ -2521,12 +2524,15 @@ xfs_rmap_unmap_extent(
int whichfork,
struct xfs_bmbt_irec *PREV)
{
+ enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP;
+
if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
return;
- __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
- XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino,
- whichfork, PREV);
+ if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
+ type = XFS_RMAP_UNMAP_SHARED;
+
+ __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
}
/*
@@ -2543,12 +2549,15 @@ xfs_rmap_convert_extent(
int whichfork,
struct xfs_bmbt_irec *PREV)
{
+ enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT;
+
if (!xfs_rmap_update_is_needed(mp, whichfork))
return;
- __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
- XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino,
- whichfork, PREV);
+ if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
+ type = XFS_RMAP_CONVERT_SHARED;
+
+ __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
}
/* Schedule the creation of an rmap for non-file data. */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 1d9fa8a300f1..6c1aba16113c 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1018,7 +1018,6 @@ xfs_rtalloc_query_range(
struct xfs_mount *mp = tp->t_mountp;
xfs_rtblock_t rtstart;
xfs_rtblock_t rtend;
- xfs_rtblock_t rem;
int is_free;
int error = 0;
@@ -1027,13 +1026,12 @@ xfs_rtalloc_query_range(
if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
low_rec->ar_startext == high_rec->ar_startext)
return 0;
- if (high_rec->ar_startext > mp->m_sb.sb_rextents)
- high_rec->ar_startext = mp->m_sb.sb_rextents;
+ high_rec->ar_startext = min(high_rec->ar_startext,
+ mp->m_sb.sb_rextents - 1);
/* Iterate the bitmap, looking for discrepancies. */
rtstart = low_rec->ar_startext;
- rem = high_rec->ar_startext - rtstart;
- while (rem) {
+ while (rtstart <= high_rec->ar_startext) {
/* Is the first block free? */
error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
&is_free);
@@ -1042,7 +1040,7 @@ xfs_rtalloc_query_range(
/* How long does the extent go for? */
error = xfs_rtfind_forw(mp, tp, rtstart,
- high_rec->ar_startext - 1, &rtend);
+ high_rec->ar_startext, &rtend);
if (error)
break;
@@ -1055,7 +1053,6 @@ xfs_rtalloc_query_range(
break;
}
- rem -= rtend - rtstart + 1;
rtstart = rtend + 1;
}
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index e56786f0a13c..653f3280e1c1 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -441,6 +441,20 @@ xchk_da_btree_block(
goto out_freebp;
}
+ /*
+ * If we've been handed a block that is below the dabtree root, does
+ * its hashval match what the parent block expected to see?
+ */
+ if (level > 0) {
+ struct xfs_da_node_entry *key;
+
+ key = xchk_da_btree_node_entry(ds, level - 1);
+ if (be32_to_cpu(key->hashval) != blk->hashval) {
+ xchk_da_set_corrupt(ds, level);
+ goto out_freebp;
+ }
+ }
+
out:
return error;
out_freebp:
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index ec3691372e7c..9e16a4d0f97c 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -24,6 +24,7 @@
#include "xfs_error.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
+#include "xfs_quota.h"
kmem_zone_t *xfs_bui_zone;
kmem_zone_t *xfs_bud_zone;
@@ -423,30 +424,26 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
STATIC int
xfs_bui_item_recover(
struct xfs_log_item *lip,
- struct xfs_trans *parent_tp)
+ struct list_head *capture_list)
{
struct xfs_bmbt_irec irec;
struct xfs_bui_log_item *buip = BUI_ITEM(lip);
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_mount *mp = lip->li_mountp;
struct xfs_map_extent *bmap;
struct xfs_bud_log_item *budp;
xfs_fsblock_t startblock_fsb;
xfs_fsblock_t inode_fsb;
xfs_filblks_t count;
xfs_exntst_t state;
- enum xfs_bmap_intent_type type;
- bool op_ok;
unsigned int bui_type;
int whichfork;
int error = 0;
/* Only one mapping operation per BUI... */
- if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
- xfs_bui_release(buip);
+ if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
return -EFSCORRUPTED;
- }
/*
* First check the validity of the extent described by the
@@ -457,76 +454,58 @@ xfs_bui_item_recover(
XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
XFS_INO_TO_FSB(mp, bmap->me_owner)));
- switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+ XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+ whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+ bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+ switch (bui_type) {
case XFS_BMAP_MAP:
case XFS_BMAP_UNMAP:
- op_ok = true;
break;
default:
- op_ok = false;
- break;
+ return -EFSCORRUPTED;
}
- if (!op_ok || startblock_fsb == 0 ||
+ if (startblock_fsb == 0 ||
bmap->me_len == 0 ||
inode_fsb == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
bmap->me_len >= mp->m_sb.sb_agblocks ||
inode_fsb >= mp->m_sb.sb_dblocks ||
- (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) {
- /*
- * This will pull the BUI from the AIL and
- * free the memory associated with it.
- */
- xfs_bui_release(buip);
+ (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
return -EFSCORRUPTED;
- }
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
- XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
+ /* Grab the inode. */
+ error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
if (error)
return error;
- /*
- * Recovery stashes all deferred ops during intent processing and
- * finishes them on completion. Transfer current dfops state to this
- * transaction and transfer the result back before we return.
- */
- xfs_defer_move(tp, parent_tp);
- budp = xfs_trans_get_bud(tp, buip);
- /* Grab the inode. */
- error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
+ error = xfs_qm_dqattach(ip);
if (error)
- goto err_inode;
+ goto err_rele;
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
- /* Process deferred bmap item. */
- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
- XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
- switch (bui_type) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- type = bui_type;
- break;
- default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- error = -EFSCORRUPTED;
- goto err_inode;
- }
+ /* Allocate transaction and do the work. */
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
+ if (error)
+ goto err_rele;
+
+ budp = xfs_trans_get_bud(tp, buip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork,
- bmap->me_startoff, bmap->me_startblock, &count, state);
+ error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
+ whichfork, bmap->me_startoff, bmap->me_startblock,
+ &count, state);
if (error)
- goto err_inode;
+ goto err_cancel;
if (count > 0) {
- ASSERT(type == XFS_BMAP_UNMAP);
+ ASSERT(bui_type == XFS_BMAP_UNMAP);
irec.br_startblock = bmap->me_startblock;
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
@@ -534,20 +513,24 @@ xfs_bui_item_recover(
xfs_bmap_unmap_extent(tp, ip, &irec);
}
- xfs_defer_move(parent_tp, tp);
- error = xfs_trans_commit(tp);
+ /*
+ * Commit transaction, which frees the transaction and saves the inode
+ * for later replay activities.
+ */
+ error = xfs_defer_ops_capture_and_commit(tp, ip, capture_list);
+ if (error)
+ goto err_unlock;
+
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_irele(ip);
+ return 0;
- return error;
-
-err_inode:
- xfs_defer_move(parent_tp, tp);
+err_cancel:
xfs_trans_cancel(tp);
- if (ip) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_irele(ip);
- }
+err_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+err_rele:
+ xfs_irele(ip);
return error;
}
@@ -559,6 +542,32 @@ xfs_bui_item_match(
return BUI_ITEM(lip)->bui_format.bui_id == intent_id;
}
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_bui_item_relog(
+ struct xfs_log_item *intent,
+ struct xfs_trans *tp)
+{
+ struct xfs_bud_log_item *budp;
+ struct xfs_bui_log_item *buip;
+ struct xfs_map_extent *extp;
+ unsigned int count;
+
+ count = BUI_ITEM(intent)->bui_format.bui_nextents;
+ extp = BUI_ITEM(intent)->bui_format.bui_extents;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ budp = xfs_trans_get_bud(tp, BUI_ITEM(intent));
+ set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
+
+ buip = xfs_bui_init(tp->t_mountp);
+ memcpy(buip->bui_format.bui_extents, extp, count * sizeof(*extp));
+ atomic_set(&buip->bui_next_extent, count);
+ xfs_trans_add_item(tp, &buip->bui_item);
+ set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
+ return &buip->bui_item;
+}
+
static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_size = xfs_bui_item_size,
.iop_format = xfs_bui_item_format,
@@ -566,6 +575,7 @@ static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_release = xfs_bui_item_release,
.iop_recover = xfs_bui_item_recover,
.iop_match = xfs_bui_item_match,
+ .iop_relog = xfs_bui_item_relog,
};
/*
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f2a8a0e75e1f..7371a7f7c652 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -947,11 +947,11 @@ xfs_free_file_space(
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
/* We can only free complete realtime extents. */
- if (XFS_IS_REALTIME_INODE(ip)) {
- xfs_extlen_t extsz = xfs_get_extsz_hint(ip);
-
- if ((startoffset_fsb | endoffset_fsb) & (extsz - 1))
- return -EINVAL;
+ if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) {
+ startoffset_fsb = roundup_64(startoffset_fsb,
+ mp->m_sb.sb_rextsize);
+ endoffset_fsb = rounddown_64(endoffset_fsb,
+ mp->m_sb.sb_rextsize);
}
/*
@@ -1147,14 +1147,6 @@ xfs_insert_file_space(
trace_xfs_insert_file_space(ip);
- /* We can only insert complete realtime extents. */
- if (XFS_IS_REALTIME_INODE(ip)) {
- xfs_extlen_t extsz = xfs_get_extsz_hint(ip);
-
- if ((stop_fsb | shift_fsb) & (extsz - 1))
- return -EINVAL;
- }
-
error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
if (error)
return error;
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 24c7a8d11e1a..d44e8b4a3391 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -719,6 +719,8 @@ xlog_recover_get_buf_lsn(
case XFS_ABTC_MAGIC:
case XFS_RMAP_CRC_MAGIC:
case XFS_REFC_CRC_MAGIC:
+ case XFS_FIBT_CRC_MAGIC:
+ case XFS_FIBT_MAGIC:
case XFS_IBT_CRC_MAGIC:
case XFS_IBT_MAGIC: {
struct xfs_btree_block *btb = blk;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 3072814e407d..1d95ed387d66 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -831,8 +831,8 @@ xfs_qm_dqget_checks(
}
/*
- * Given the file system, id, and type (UDQUOT/GDQUOT), return a locked
- * dquot, doing an allocation (if requested) as needed.
+ * Given the file system, id, and type (UDQUOT/GDQUOT/PDQUOT), return a
+ * locked dquot, doing an allocation (if requested) as needed.
*/
int
xfs_qm_dqget(
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6cb8cd11072a..6c11bfc3d452 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -585,10 +585,10 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
STATIC int
xfs_efi_item_recover(
struct xfs_log_item *lip,
- struct xfs_trans *parent_tp)
+ struct list_head *capture_list)
{
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_mount *mp = lip->li_mountp;
struct xfs_efd_log_item *efdp;
struct xfs_trans *tp;
struct xfs_extent *extp;
@@ -608,14 +608,8 @@ xfs_efi_item_recover(
if (startblock_fsb == 0 ||
extp->ext_len == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
- extp->ext_len >= mp->m_sb.sb_agblocks) {
- /*
- * This will pull the EFI from the AIL and
- * free the memory associated with it.
- */
- xfs_efi_release(efip);
+ extp->ext_len >= mp->m_sb.sb_agblocks)
return -EFSCORRUPTED;
- }
}
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -633,8 +627,7 @@ xfs_efi_item_recover(
}
- error = xfs_trans_commit(tp);
- return error;
+ return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
abort_error:
xfs_trans_cancel(tp);
@@ -649,6 +642,34 @@ xfs_efi_item_match(
return EFI_ITEM(lip)->efi_format.efi_id == intent_id;
}
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_efi_item_relog(
+ struct xfs_log_item *intent,
+ struct xfs_trans *tp)
+{
+ struct xfs_efd_log_item *efdp;
+ struct xfs_efi_log_item *efip;
+ struct xfs_extent *extp;
+ unsigned int count;
+
+ count = EFI_ITEM(intent)->efi_format.efi_nextents;
+ extp = EFI_ITEM(intent)->efi_format.efi_extents;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ efdp = xfs_trans_get_efd(tp, EFI_ITEM(intent), count);
+ efdp->efd_next_extent = count;
+ memcpy(efdp->efd_format.efd_extents, extp, count * sizeof(*extp));
+ set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+
+ efip = xfs_efi_init(tp->t_mountp, count);
+ memcpy(efip->efi_format.efi_extents, extp, count * sizeof(*extp));
+ atomic_set(&efip->efi_next_extent, count);
+ xfs_trans_add_item(tp, &efip->efi_item);
+ set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
+ return &efip->efi_item;
+}
+
static const struct xfs_item_ops xfs_efi_item_ops = {
.iop_size = xfs_efi_item_size,
.iop_format = xfs_efi_item_format,
@@ -656,6 +677,7 @@ static const struct xfs_item_ops xfs_efi_item_ops = {
.iop_release = xfs_efi_item_release,
.iop_recover = xfs_efi_item_recover,
.iop_match = xfs_efi_item_match,
+ .iop_relog = xfs_efi_item_relog,
};
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3d1b95124744..5b0f93f73837 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -32,6 +32,39 @@
static const struct vm_operations_struct xfs_file_vm_ops;
+/*
+ * Decide if the given file range is aligned to the size of the fundamental
+ * allocation unit for the file.
+ */
+static bool
+xfs_is_falloc_aligned(
+ struct xfs_inode *ip,
+ loff_t pos,
+ long long int len)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ uint64_t mask;
+
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ if (!is_power_of_2(mp->m_sb.sb_rextsize)) {
+ u64 rextbytes;
+ u32 mod;
+
+ rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
+ div_u64_rem(pos, rextbytes, &mod);
+ if (mod)
+ return false;
+ div_u64_rem(len, rextbytes, &mod);
+ return mod == 0;
+ }
+ mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1;
+ } else {
+ mask = mp->m_sb.sb_blocksize - 1;
+ }
+
+ return !((pos | len) & mask);
+}
+
int
xfs_update_prealloc_flags(
struct xfs_inode *ip,
@@ -850,9 +883,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
- unsigned int blksize_mask = i_blocksize(inode) - 1;
-
- if (offset & blksize_mask || len & blksize_mask) {
+ if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
@@ -872,10 +903,9 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
- unsigned int blksize_mask = i_blocksize(inode) - 1;
loff_t isize = i_size_read(inode);
- if (offset & blksize_mask || len & blksize_mask) {
+ if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 1a88025e68a3..db23e455eb91 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -33,39 +33,7 @@ enum xfs_fstrm_alloc {
/*
* Allocation group filestream associations are tracked with per-ag atomic
* counters. These counters allow xfs_filestream_pick_ag() to tell whether a
- * particular AG already has active filestreams associated with it. The mount
- * point's m_peraglock is used to protect these counters from per-ag array
- * re-allocation during a growfs operation. When xfs_growfs_data_private() is
- * about to reallocate the array, it calls xfs_filestream_flush() with the
- * m_peraglock held in write mode.
- *
- * Since xfs_mru_cache_flush() guarantees that all the free functions for all
- * the cache elements have finished executing before it returns, it's safe for
- * the free functions to use the atomic counters without m_peraglock protection.
- * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
- * whether it was called with the m_peraglock held in read mode, write mode or
- * not held at all. The race condition this addresses is the following:
- *
- * - The work queue scheduler fires and pulls a filestream directory cache
- * element off the LRU end of the cache for deletion, then gets pre-empted.
- * - A growfs operation grabs the m_peraglock in write mode, flushes all the
- * remaining items from the cache and reallocates the mount point's per-ag
- * array, resetting all the counters to zero.
- * - The work queue thread resumes and calls the free function for the element
- * it started cleaning up earlier. In the process it decrements the
- * filestreams counter for an AG that now has no references.
- *
- * With a shrinkfs feature, the above scenario could panic the system.
- *
- * All other uses of the following macros should be protected by either the
- * m_peraglock held in read mode, or the cache's internal locking exposed by the
- * interval between a call to xfs_mru_cache_lookup() and a call to
- * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
- * when new elements are added to the cache.
- *
- * Combined, these locking rules ensure that no associations will ever exist in
- * the cache that reference per-ag array elements that have since been
- * reallocated.
+ * particular AG already has active filestreams associated with it.
*/
int
xfs_filestream_peek_ag(
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 4eebcec4aae6..9ce5e7d5bf8f 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -26,7 +26,7 @@
#include "xfs_rtalloc.h"
/* Convert an xfs_fsmap to an fsmap. */
-void
+static void
xfs_fsmap_from_internal(
struct fsmap *dest,
struct xfs_fsmap *src)
@@ -155,8 +155,7 @@ xfs_fsmap_owner_from_rmap(
/* getfsmap query state */
struct xfs_getfsmap_info {
struct xfs_fsmap_head *head;
- xfs_fsmap_format_t formatter; /* formatting fn */
- void *format_arg; /* format buffer */
+ struct fsmap *fsmap_recs; /* mapping records */
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
xfs_daddr_t next_daddr; /* next daddr we expect */
u64 missing_owner; /* owner of holes */
@@ -224,6 +223,20 @@ xfs_getfsmap_is_shared(
return 0;
}
+static inline void
+xfs_getfsmap_format(
+ struct xfs_mount *mp,
+ struct xfs_fsmap *xfm,
+ struct xfs_getfsmap_info *info)
+{
+ struct fsmap *rec;
+
+ trace_xfs_getfsmap_mapping(mp, xfm);
+
+ rec = &info->fsmap_recs[info->head->fmh_entries++];
+ xfs_fsmap_from_internal(rec, xfm);
+}
+
/*
* Format a reverse mapping for getfsmap, having translated rm_startblock
* into the appropriate daddr units.
@@ -256,6 +269,9 @@ xfs_getfsmap_helper(
/* Are we just counting mappings? */
if (info->head->fmh_count == 0) {
+ if (info->head->fmh_entries == UINT_MAX)
+ return -ECANCELED;
+
if (rec_daddr > info->next_daddr)
info->head->fmh_entries++;
@@ -285,10 +301,7 @@ xfs_getfsmap_helper(
fmr.fmr_offset = 0;
fmr.fmr_length = rec_daddr - info->next_daddr;
fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
- error = info->formatter(&fmr, info->format_arg);
- if (error)
- return error;
- info->head->fmh_entries++;
+ xfs_getfsmap_format(mp, &fmr, info);
}
if (info->last)
@@ -320,11 +333,8 @@ xfs_getfsmap_helper(
if (shared)
fmr.fmr_flags |= FMR_OF_SHARED;
}
- error = info->formatter(&fmr, info->format_arg);
- if (error)
- return error;
- info->head->fmh_entries++;
+ xfs_getfsmap_format(mp, &fmr, info);
out:
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
if (info->next_daddr < rec_daddr)
@@ -792,11 +802,11 @@ xfs_getfsmap_check_keys(
#endif /* CONFIG_XFS_RT */
/*
- * Get filesystem's extents as described in head, and format for
- * output. Calls formatter to fill the user's buffer until all
- * extents are mapped, until the passed-in head->fmh_count slots have
- * been filled, or until the formatter short-circuits the loop, if it
- * is tracking filled-in extents on its own.
+ * Get filesystem's extents as described in head, and format for output. Fills
+ * in the supplied records array until there are no more reverse mappings to
+ * return or head.fmh_entries == head.fmh_count. In the second case, this
+ * function returns -ECANCELED to indicate that more records would have been
+ * returned.
*
* Key to Confusion
* ----------------
@@ -816,8 +826,7 @@ int
xfs_getfsmap(
struct xfs_mount *mp,
struct xfs_fsmap_head *head,
- xfs_fsmap_format_t formatter,
- void *arg)
+ struct fsmap *fsmap_recs)
{
struct xfs_trans *tp = NULL;
struct xfs_fsmap dkeys[2]; /* per-dev keys */
@@ -892,8 +901,7 @@ xfs_getfsmap(
info.next_daddr = head->fmh_keys[0].fmr_physical +
head->fmh_keys[0].fmr_length;
- info.formatter = formatter;
- info.format_arg = arg;
+ info.fsmap_recs = fsmap_recs;
info.head = head;
/*
diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h
index c6c57739b862..a0775788e7b1 100644
--- a/fs/xfs/xfs_fsmap.h
+++ b/fs/xfs/xfs_fsmap.h
@@ -27,13 +27,9 @@ struct xfs_fsmap_head {
struct xfs_fsmap fmh_keys[2]; /* low and high keys */
};
-void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
-/* fsmap to userspace formatter - copy to user & advance pointer */
-typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
-
int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
- xfs_fsmap_format_t formatter, void *arg);
+ struct fsmap *out_recs);
#endif /* __XFS_FSMAP_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 49624973eecc..2bfbcf28b1bd 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -698,6 +698,68 @@ out_unlock:
return error;
}
+/* Propagate di_flags from a parent inode to a child inode. */
+static void
+xfs_inode_inherit_flags(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ unsigned int di_flags = 0;
+ umode_t mode = VFS_I(ip)->i_mode;
+
+ if (S_ISDIR(mode)) {
+ if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ ip->i_d.di_extsize = pip->i_d.di_extsize;
+ }
+ if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ } else if (S_ISREG(mode)) {
+ if ((pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) &&
+ xfs_sb_version_hasrealtime(&ip->i_mount->m_sb))
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ ip->i_d.di_extsize = pip->i_d.di_extsize;
+ }
+ }
+ if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
+ xfs_inherit_noatime)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
+ xfs_inherit_nodump)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
+ xfs_inherit_sync)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
+ xfs_inherit_nosymlinks)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
+ xfs_inherit_nodefrag)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+
+ ip->i_d.di_flags |= di_flags;
+}
+
+/* Propagate di_flags2 from a parent inode to a child inode. */
+static void
+xfs_inode_inherit_flags2(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) {
+ ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_d.di_cowextsize = pip->i_d.di_cowextsize;
+ }
+ if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+ ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX;
+}
+
/*
* Allocate an inode on disk and return a copy of its in-core version.
* The in-core inode is locked exclusively. Set mode, nlink, and rdev
@@ -841,54 +903,10 @@ xfs_ialloc(
break;
case S_IFREG:
case S_IFDIR:
- if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
- uint di_flags = 0;
-
- if (S_ISDIR(mode)) {
- if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_RTINHERIT;
- if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
- di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- ip->i_d.di_extsize = pip->i_d.di_extsize;
- }
- if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
- di_flags |= XFS_DIFLAG_PROJINHERIT;
- } else if (S_ISREG(mode)) {
- if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_REALTIME;
- if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
- di_flags |= XFS_DIFLAG_EXTSIZE;
- ip->i_d.di_extsize = pip->i_d.di_extsize;
- }
- }
- if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
- xfs_inherit_noatime)
- di_flags |= XFS_DIFLAG_NOATIME;
- if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
- xfs_inherit_nodump)
- di_flags |= XFS_DIFLAG_NODUMP;
- if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
- xfs_inherit_sync)
- di_flags |= XFS_DIFLAG_SYNC;
- if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
- xfs_inherit_nosymlinks)
- di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
- xfs_inherit_nodefrag)
- di_flags |= XFS_DIFLAG_NODEFRAG;
- if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
- di_flags |= XFS_DIFLAG_FILESTREAM;
-
- ip->i_d.di_flags |= di_flags;
- }
- if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY)) {
- if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) {
- ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
- ip->i_d.di_cowextsize = pip->i_d.di_cowextsize;
- }
- if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
- ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX;
- }
+ if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY))
+ xfs_inode_inherit_flags(ip, pip);
+ if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY))
+ xfs_inode_inherit_flags2(ip, pip);
/* FALLTHROUGH */
case S_IFLNK:
ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
@@ -1516,17 +1534,10 @@ xfs_itruncate_extents_flags(
if (error)
goto out;
- /*
- * Duplicate the transaction that has the permanent
- * reservation and commit the old transaction.
- */
+ /* free the just unmapped extents */
error = xfs_defer_finish(&tp);
if (error)
goto out;
-
- error = xfs_trans_roll_inode(&tp, ip);
- if (error)
- goto out;
}
if (whichfork == XFS_DATA_FORK) {
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index bca7659fb5c6..3fbd98f61ea5 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1716,39 +1716,17 @@ out_free_buf:
return error;
}
-struct getfsmap_info {
- struct xfs_mount *mp;
- struct fsmap_head __user *data;
- unsigned int idx;
- __u32 last_flags;
-};
-
-STATIC int
-xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
-{
- struct getfsmap_info *info = priv;
- struct fsmap fm;
-
- trace_xfs_getfsmap_mapping(info->mp, xfm);
-
- info->last_flags = xfm->fmr_flags;
- xfs_fsmap_from_internal(&fm, xfm);
- if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm,
- sizeof(struct fsmap)))
- return -EFAULT;
-
- return 0;
-}
-
STATIC int
xfs_ioc_getfsmap(
struct xfs_inode *ip,
struct fsmap_head __user *arg)
{
- struct getfsmap_info info = { NULL };
struct xfs_fsmap_head xhead = {0};
struct fsmap_head head;
- bool aborted = false;
+ struct fsmap *recs;
+ unsigned int count;
+ __u32 last_flags = 0;
+ bool done = false;
int error;
if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
@@ -1760,38 +1738,112 @@ xfs_ioc_getfsmap(
sizeof(head.fmh_keys[1].fmr_reserved)))
return -EINVAL;
+ /*
+ * Use an internal memory buffer so that we don't have to copy fsmap
+ * data to userspace while holding locks. Start by trying to allocate
+ * up to 128k for the buffer, but fall back to a single page if needed.
+ */
+ count = min_t(unsigned int, head.fmh_count,
+ 131072 / sizeof(struct fsmap));
+ recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+ if (!recs) {
+ count = min_t(unsigned int, head.fmh_count,
+ PAGE_SIZE / sizeof(struct fsmap));
+ recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+ if (!recs)
+ return -ENOMEM;
+ }
+
xhead.fmh_iflags = head.fmh_iflags;
- xhead.fmh_count = head.fmh_count;
xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
- info.mp = ip->i_mount;
- info.data = arg;
- error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
- if (error == -ECANCELED) {
- error = 0;
- aborted = true;
- } else if (error)
- return error;
+ head.fmh_entries = 0;
+ do {
+ struct fsmap __user *user_recs;
+ struct fsmap *last_rec;
+
+ user_recs = &arg->fmh_recs[head.fmh_entries];
+ xhead.fmh_entries = 0;
+ xhead.fmh_count = min_t(unsigned int, count,
+ head.fmh_count - head.fmh_entries);
+
+ /* Run query, record how many entries we got. */
+ error = xfs_getfsmap(ip->i_mount, &xhead, recs);
+ switch (error) {
+ case 0:
+ /*
+ * There are no more records in the result set. Copy
+ * whatever we got to userspace and break out.
+ */
+ done = true;
+ break;
+ case -ECANCELED:
+ /*
+ * The internal memory buffer is full. Copy whatever
+ * records we got to userspace and go again if we have
+ * not yet filled the userspace buffer.
+ */
+ error = 0;
+ break;
+ default:
+ goto out_free;
+ }
+ head.fmh_entries += xhead.fmh_entries;
+ head.fmh_oflags = xhead.fmh_oflags;
- /* If we didn't abort, set the "last" flag in the last fmx */
- if (!aborted && info.idx) {
- info.last_flags |= FMR_OF_LAST;
- if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags,
- &info.last_flags, sizeof(info.last_flags)))
- return -EFAULT;
+ /*
+ * If the caller wanted a record count or there aren't any
+ * new records to return, we're done.
+ */
+ if (head.fmh_count == 0 || xhead.fmh_entries == 0)
+ break;
+
+ /* Copy all the records we got out to userspace. */
+ if (copy_to_user(user_recs, recs,
+ xhead.fmh_entries * sizeof(struct fsmap))) {
+ error = -EFAULT;
+ goto out_free;
+ }
+
+ /* Remember the last record flags we copied to userspace. */
+ last_rec = &recs[xhead.fmh_entries - 1];
+ last_flags = last_rec->fmr_flags;
+
+ /* Set up the low key for the next iteration. */
+ xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
+ trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
+ } while (!done && head.fmh_entries < head.fmh_count);
+
+ /*
+ * If there are no more records in the query result set and we're not
+ * in counting mode, mark the last record returned with the LAST flag.
+ */
+ if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
+ struct fsmap __user *user_rec;
+
+ last_flags |= FMR_OF_LAST;
+ user_rec = &arg->fmh_recs[head.fmh_entries - 1];
+
+ if (copy_to_user(&user_rec->fmr_flags, &last_flags,
+ sizeof(last_flags))) {
+ error = -EFAULT;
+ goto out_free;
+ }
}
/* copy back header */
- head.fmh_entries = xhead.fmh_entries;
- head.fmh_oflags = xhead.fmh_oflags;
- if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
- return -EFAULT;
+ if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
+ error = -EFAULT;
+ goto out_free;
+ }
- return 0;
+out_free:
+ kmem_free(recs);
+ return error;
}
STATIC int
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 80a13c8561d8..5e165456da68 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -237,7 +237,7 @@ xfs_vn_create(
umode_t mode,
bool flags)
{
- return xfs_vn_mknod(dir, dentry, mode, 0);
+ return xfs_generic_create(dir, dentry, mode, 0, false);
}
STATIC int
@@ -246,7 +246,7 @@ xfs_vn_mkdir(
struct dentry *dentry,
umode_t mode)
{
- return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
+ return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false);
}
STATIC struct dentry *
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ab737fed7b12..5b7a1e201559 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -123,7 +123,6 @@ typedef __u32 xfs_nlink_t;
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
#define EFSBADCRC EBADMSG /* Bad CRC detected */
-#define SYNCHRONIZE() barrier()
#define __return_address __builtin_return_address(0)
/*
@@ -176,6 +175,12 @@ static inline xfs_dev_t linux_to_xfs_dev_t(dev_t dev)
#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
#define xfs_stack_trace() dump_stack()
+static inline uint64_t rounddown_64(uint64_t x, uint32_t y)
+{
+ do_div(x, y);
+ return x * y;
+}
+
static inline uint64_t roundup_64(uint64_t x, uint32_t y)
{
x += y - 1;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad0c69ee8947..fa2d05e65ff1 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1475,14 +1475,14 @@ xlog_commit_record(
}
/*
- * Push on the buffer cache code if we ever use more than 75% of the on-disk
- * log space. This code pushes on the lsn which would supposedly free up
- * the 25% which we want to leave free. We may need to adopt a policy which
- * pushes on an lsn which is further along in the log once we reach the high
- * water mark. In this manner, we would be creating a low water mark.
+ * Compute the LSN that we'd need to push the log tail towards in order to have
+ * (a) enough on-disk log space to log the number of bytes specified, (b) at
+ * least 25% of the log space free, and (c) at least 256 blocks free. If the
+ * log free space already meets all three thresholds, this function returns
+ * NULLCOMMITLSN.
*/
-STATIC void
-xlog_grant_push_ail(
+xfs_lsn_t
+xlog_grant_push_threshold(
struct xlog *log,
int need_bytes)
{
@@ -1508,7 +1508,7 @@ xlog_grant_push_ail(
free_threshold = max(free_threshold, (log->l_logBBsize >> 2));
free_threshold = max(free_threshold, 256);
if (free_blocks >= free_threshold)
- return;
+ return NULLCOMMITLSN;
xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
&threshold_block);
@@ -1528,13 +1528,33 @@ xlog_grant_push_ail(
if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
threshold_lsn = last_sync_lsn;
+ return threshold_lsn;
+}
+
+/*
+ * Push the tail of the log if we need to do so to maintain the free log space
+ * thresholds set out by xlog_grant_push_threshold. We may need to adopt a
+ * policy which pushes on an lsn which is further along in the log once we
+ * reach the high water mark. In this manner, we would be creating a low water
+ * mark.
+ */
+STATIC void
+xlog_grant_push_ail(
+ struct xlog *log,
+ int need_bytes)
+{
+ xfs_lsn_t threshold_lsn;
+
+ threshold_lsn = xlog_grant_push_threshold(log, need_bytes);
+ if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log))
+ return;
+
/*
* Get the transaction layer to kick the dirty buffers out to
* disk asynchronously. No point in trying to do this if
* the filesystem is shutting down.
*/
- if (!XLOG_FORCED_SHUTDOWN(log))
- xfs_ail_push(log->l_ailp, threshold_lsn);
+ xfs_ail_push(log->l_ailp, threshold_lsn);
}
/*
@@ -1604,9 +1624,7 @@ xlog_cksum(
int i;
int xheads;
- xheads = size / XLOG_HEADER_CYCLE_SIZE;
- if (size % XLOG_HEADER_CYCLE_SIZE)
- xheads++;
+ xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE);
for (i = 1; i < xheads; i++) {
crc = crc32c(crc, &xhdr[i].hic_xheader,
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 1412d6993f1e..58c3fcbec94a 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -141,4 +141,6 @@ void xfs_log_quiesce(struct xfs_mount *mp);
bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
bool xfs_log_in_recovery(struct xfs_mount *);
+xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes);
+
#endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a17d788921d6..87886b7f77da 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -371,6 +371,19 @@ out:
return error;
}
+static inline int
+xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh)
+{
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+ int h_size = be32_to_cpu(rh->h_size);
+
+ if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) &&
+ h_size > XLOG_HEADER_CYCLE_SIZE)
+ return DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE);
+ }
+ return 1;
+}
+
/*
* Potentially backup over partial log record write.
*
@@ -463,15 +476,7 @@ xlog_find_verify_log_record(
* reset last_blk. Only when last_blk points in the middle of a log
* record do we update last_blk.
*/
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- uint h_size = be32_to_cpu(head->h_size);
-
- xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
- if (h_size % XLOG_HEADER_CYCLE_SIZE)
- xhdrs++;
- } else {
- xhdrs = 1;
- }
+ xhdrs = xlog_logrec_hblks(log, head);
if (*last_blk - i + extra_bblks !=
BTOBB(be32_to_cpu(head->h_len)) + xhdrs)
@@ -1158,22 +1163,7 @@ xlog_check_unmount_rec(
* below. We won't want to clear the unmount record if there is one, so
* we pass the lsn of the unmount record rather than the block after it.
*/
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- int h_size = be32_to_cpu(rhead->h_size);
- int h_version = be32_to_cpu(rhead->h_version);
-
- if ((h_version & XLOG_VERSION_2) &&
- (h_size > XLOG_HEADER_CYCLE_SIZE)) {
- hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
- if (h_size % XLOG_HEADER_CYCLE_SIZE)
- hblks++;
- } else {
- hblks = 1;
- }
- } else {
- hblks = 1;
- }
-
+ hblks = xlog_logrec_hblks(log, rhead);
after_umount_blk = xlog_wrap_logbno(log,
rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)));
@@ -2444,44 +2434,66 @@ xlog_recover_process_data(
/* Take all the collected deferred ops and finish them in order. */
static int
xlog_finish_defer_ops(
- struct xfs_trans *parent_tp)
+ struct xfs_mount *mp,
+ struct list_head *capture_list)
{
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_defer_capture *dfc, *next;
struct xfs_trans *tp;
- int64_t freeblks;
- uint resblks;
- int error;
+ struct xfs_inode *ip;
+ int error = 0;
- /*
- * We're finishing the defer_ops that accumulated as a result of
- * recovering unfinished intent items during log recovery. We
- * reserve an itruncate transaction because it is the largest
- * permanent transaction type. Since we're the only user of the fs
- * right now, take 93% (15/16) of the available free blocks. Use
- * weird math to avoid a 64-bit division.
- */
- freeblks = percpu_counter_sum(&mp->m_fdblocks);
- if (freeblks <= 0)
- return -ENOSPC;
- resblks = min_t(int64_t, UINT_MAX, freeblks);
- resblks = (resblks * 15) >> 4;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
- 0, XFS_TRANS_RESERVE, &tp);
- if (error)
- return error;
- /* transfer all collected dfops to this transaction */
- xfs_defer_move(tp, parent_tp);
+ list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
+ struct xfs_trans_res resv;
- return xfs_trans_commit(tp);
+ /*
+ * Create a new transaction reservation from the captured
+ * information. Set logcount to 1 to force the new transaction
+ * to regrant every roll so that we can make forward progress
+ * in recovery no matter how full the log might be.
+ */
+ resv.tr_logres = dfc->dfc_logres;
+ resv.tr_logcount = 1;
+ resv.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+
+ error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres,
+ dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+
+ /*
+ * Transfer to this new transaction all the dfops we captured
+ * from recovering a single intent item.
+ */
+ list_del_init(&dfc->dfc_list);
+ xfs_defer_ops_continue(dfc, tp, &ip);
+
+ error = xfs_trans_commit(tp);
+ if (ip) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_irele(ip);
+ }
+ if (error)
+ return error;
+ }
+
+ ASSERT(list_empty(capture_list));
+ return 0;
}
-/* Is this log item a deferred action intent? */
-static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
+/* Release all the captured defer ops and capture structures in this list. */
+static void
+xlog_abort_defer_ops(
+ struct xfs_mount *mp,
+ struct list_head *capture_list)
{
- return lip->li_ops->iop_recover != NULL &&
- lip->li_ops->iop_match != NULL;
-}
+ struct xfs_defer_capture *dfc;
+ struct xfs_defer_capture *next;
+ list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
+ list_del_init(&dfc->dfc_list);
+ xfs_defer_ops_release(mp, dfc);
+ }
+}
/*
* When this is called, all of the log intent items which did not have
* corresponding log done items should be in the AIL. What we do now
@@ -2502,35 +2514,23 @@ STATIC int
xlog_recover_process_intents(
struct xlog *log)
{
- struct xfs_trans *parent_tp;
+ LIST_HEAD(capture_list);
struct xfs_ail_cursor cur;
struct xfs_log_item *lip;
struct xfs_ail *ailp;
- int error;
+ int error = 0;
#if defined(DEBUG) || defined(XFS_WARN)
xfs_lsn_t last_lsn;
#endif
- /*
- * The intent recovery handlers commit transactions to complete recovery
- * for individual intents, but any new deferred operations that are
- * queued during that process are held off until the very end. The
- * purpose of this transaction is to serve as a container for deferred
- * operations. Each intent recovery handler must transfer dfops here
- * before its local transaction commits, and we'll finish the entire
- * list below.
- */
- error = xfs_trans_alloc_empty(log->l_mp, &parent_tp);
- if (error)
- return error;
-
ailp = log->l_ailp;
spin_lock(&ailp->ail_lock);
- lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
#if defined(DEBUG) || defined(XFS_WARN)
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
#endif
- while (lip != NULL) {
+ for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+ lip != NULL;
+ lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
/*
* We're done when we see something other than an intent.
* There should be no intents left in the AIL now.
@@ -2552,26 +2552,29 @@ xlog_recover_process_intents(
/*
* NOTE: If your intent processing routine can create more
- * deferred ops, you /must/ attach them to the transaction in
- * this routine or else those subsequent intents will get
+ * deferred ops, you /must/ attach them to the capture list in
+ * the recover routine or else those subsequent intents will be
* replayed in the wrong order!
*/
- if (!test_and_set_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
- spin_unlock(&ailp->ail_lock);
- error = lip->li_ops->iop_recover(lip, parent_tp);
- spin_lock(&ailp->ail_lock);
- }
+ spin_unlock(&ailp->ail_lock);
+ error = lip->li_ops->iop_recover(lip, &capture_list);
+ spin_lock(&ailp->ail_lock);
if (error)
- goto out;
- lip = xfs_trans_ail_cursor_next(ailp, &cur);
+ break;
}
-out:
+
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->ail_lock);
- if (!error)
- error = xlog_finish_defer_ops(parent_tp);
- xfs_trans_cancel(parent_tp);
+ if (error)
+ goto err;
+ error = xlog_finish_defer_ops(log->l_mp, &capture_list);
+ if (error)
+ goto err;
+
+ return 0;
+err:
+ xlog_abort_defer_ops(log->l_mp, &capture_list);
return error;
}
@@ -2878,7 +2881,8 @@ STATIC int
xlog_valid_rec_header(
struct xlog *log,
struct xlog_rec_header *rhead,
- xfs_daddr_t blkno)
+ xfs_daddr_t blkno,
+ int bufsize)
{
int hlen;
@@ -2894,10 +2898,14 @@ xlog_valid_rec_header(
return -EFSCORRUPTED;
}
- /* LR body must have data or it wouldn't have been written */
+ /*
+ * LR body must have data (or it wouldn't have been written)
+ * and h_len must not be greater than LR buffer size.
+ */
hlen = be32_to_cpu(rhead->h_len);
- if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > INT_MAX))
+ if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize))
return -EFSCORRUPTED;
+
if (XFS_IS_CORRUPT(log->l_mp,
blkno > log->l_logBBsize || blkno > INT_MAX))
return -EFSCORRUPTED;
@@ -2958,9 +2966,6 @@ xlog_do_recovery_pass(
goto bread_err1;
rhead = (xlog_rec_header_t *)offset;
- error = xlog_valid_rec_header(log, rhead, tail_blk);
- if (error)
- goto bread_err1;
/*
* xfsprogs has a bug where record length is based on lsunit but
@@ -2975,30 +2980,22 @@ xlog_do_recovery_pass(
*/
h_size = be32_to_cpu(rhead->h_size);
h_len = be32_to_cpu(rhead->h_len);
- if (h_len > h_size) {
- if (h_len <= log->l_mp->m_logbsize &&
- be32_to_cpu(rhead->h_num_logops) == 1) {
- xfs_warn(log->l_mp,
+ if (h_len > h_size && h_len <= log->l_mp->m_logbsize &&
+ rhead->h_num_logops == cpu_to_be32(1)) {
+ xfs_warn(log->l_mp,
"invalid iclog size (%d bytes), using lsunit (%d bytes)",
- h_size, log->l_mp->m_logbsize);
- h_size = log->l_mp->m_logbsize;
- } else {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW,
- log->l_mp);
- error = -EFSCORRUPTED;
- goto bread_err1;
- }
+ h_size, log->l_mp->m_logbsize);
+ h_size = log->l_mp->m_logbsize;
}
- if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
- (h_size > XLOG_HEADER_CYCLE_SIZE)) {
- hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
- if (h_size % XLOG_HEADER_CYCLE_SIZE)
- hblks++;
+ error = xlog_valid_rec_header(log, rhead, tail_blk, h_size);
+ if (error)
+ goto bread_err1;
+
+ hblks = xlog_logrec_hblks(log, rhead);
+ if (hblks != 1) {
kmem_free(hbp);
hbp = xlog_alloc_buffer(log, hblks);
- } else {
- hblks = 1;
}
} else {
ASSERT(log->l_sectBBsize == 1);
@@ -3070,7 +3067,7 @@ xlog_do_recovery_pass(
}
rhead = (xlog_rec_header_t *)offset;
error = xlog_valid_rec_header(log, rhead,
- split_hblks ? blk_no : 0);
+ split_hblks ? blk_no : 0, h_size);
if (error)
goto bread_err2;
@@ -3151,7 +3148,7 @@ xlog_do_recovery_pass(
goto bread_err2;
rhead = (xlog_rec_header_t *)offset;
- error = xlog_valid_rec_header(log, rhead, blk_no);
+ error = xlog_valid_rec_header(log, rhead, blk_no, h_size);
if (error)
goto bread_err2;
@@ -3449,6 +3446,14 @@ xlog_recover_finish(
int error;
error = xlog_recover_process_intents(log);
if (error) {
+ /*
+ * Cancel all the unprocessed intent items now so that
+ * we don't leave them pinned in the AIL. This can
+ * cause the AIL to livelock on the pinned item if
+ * anyone tries to push the AIL (inode reclaim does
+ * this) before we get around to xfs_log_mount_cancel.
+ */
+ xlog_recover_cancel_intents(log);
xfs_alert(log->l_mp, "Failed to recover intents");
return error;
}
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 3f82e0c92c2d..b2a9abee8b2b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -249,7 +249,6 @@ xfs_qm_unmount_quotas(
STATIC int
xfs_qm_dqattach_one(
struct xfs_inode *ip,
- xfs_dqid_t id,
xfs_dqtype_t type,
bool doalloc,
struct xfs_dquot **IO_idqpp)
@@ -330,23 +329,23 @@ xfs_qm_dqattach_locked(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
- error = xfs_qm_dqattach_one(ip, i_uid_read(VFS_I(ip)),
- XFS_DQTYPE_USER, doalloc, &ip->i_udquot);
+ error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER,
+ doalloc, &ip->i_udquot);
if (error)
goto done;
ASSERT(ip->i_udquot);
}
if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
- error = xfs_qm_dqattach_one(ip, i_gid_read(VFS_I(ip)),
- XFS_DQTYPE_GROUP, doalloc, &ip->i_gdquot);
+ error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_GROUP,
+ doalloc, &ip->i_gdquot);
if (error)
goto done;
ASSERT(ip->i_gdquot);
}
if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
- error = xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQTYPE_PROJ,
+ error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_PROJ,
doalloc, &ip->i_pdquot);
if (error)
goto done;
@@ -1663,6 +1662,7 @@ xfs_qm_vop_dqalloc(
}
if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
+ ASSERT(O_udqpp);
if (!uid_eq(inode->i_uid, uid)) {
/*
* What we need is the dquot that has this uid, and
@@ -1696,6 +1696,7 @@ xfs_qm_vop_dqalloc(
}
}
if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
+ ASSERT(O_gdqpp);
if (!gid_eq(inode->i_gid, gid)) {
xfs_iunlock(ip, lockflags);
error = xfs_qm_dqget(mp, from_kgid(user_ns, gid),
@@ -1713,9 +1714,10 @@ xfs_qm_vop_dqalloc(
}
}
if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
+ ASSERT(O_pdqpp);
if (ip->i_d.di_projid != prid) {
xfs_iunlock(ip, lockflags);
- error = xfs_qm_dqget(mp, (xfs_dqid_t)prid,
+ error = xfs_qm_dqget(mp, prid,
XFS_DQTYPE_PROJ, true, &pq);
if (error) {
ASSERT(error != -ENOENT);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index ca93b6488377..7529eb63ce94 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -424,7 +424,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
STATIC int
xfs_cui_item_recover(
struct xfs_log_item *lip,
- struct xfs_trans *parent_tp)
+ struct list_head *capture_list)
{
struct xfs_bmbt_irec irec;
struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
@@ -432,7 +432,7 @@ xfs_cui_item_recover(
struct xfs_cud_log_item *cudp;
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_mount *mp = lip->li_mountp;
xfs_fsblock_t startblock_fsb;
xfs_fsblock_t new_fsb;
xfs_extlen_t new_len;
@@ -467,14 +467,8 @@ xfs_cui_item_recover(
refc->pe_len == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
refc->pe_len >= mp->m_sb.sb_agblocks ||
- (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
- /*
- * This will pull the CUI from the AIL and
- * free the memory associated with it.
- */
- xfs_cui_release(cuip);
+ (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS))
return -EFSCORRUPTED;
- }
}
/*
@@ -493,12 +487,7 @@ xfs_cui_item_recover(
mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
if (error)
return error;
- /*
- * Recovery stashes all deferred ops during intent processing and
- * finishes them on completion. Transfer current dfops state to this
- * transaction and transfer the result back before we return.
- */
- xfs_defer_move(tp, parent_tp);
+
cudp = xfs_trans_get_cud(tp, cuip);
for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
@@ -555,13 +544,10 @@ xfs_cui_item_recover(
}
xfs_refcount_finish_one_cleanup(tp, rcur, error);
- xfs_defer_move(parent_tp, tp);
- error = xfs_trans_commit(tp);
- return error;
+ return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
abort_error:
xfs_refcount_finish_one_cleanup(tp, rcur, error);
- xfs_defer_move(parent_tp, tp);
xfs_trans_cancel(tp);
return error;
}
@@ -574,6 +560,32 @@ xfs_cui_item_match(
return CUI_ITEM(lip)->cui_format.cui_id == intent_id;
}
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_cui_item_relog(
+ struct xfs_log_item *intent,
+ struct xfs_trans *tp)
+{
+ struct xfs_cud_log_item *cudp;
+ struct xfs_cui_log_item *cuip;
+ struct xfs_phys_extent *extp;
+ unsigned int count;
+
+ count = CUI_ITEM(intent)->cui_format.cui_nextents;
+ extp = CUI_ITEM(intent)->cui_format.cui_extents;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ cudp = xfs_trans_get_cud(tp, CUI_ITEM(intent));
+ set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
+
+ cuip = xfs_cui_init(tp->t_mountp, count);
+ memcpy(cuip->cui_format.cui_extents, extp, count * sizeof(*extp));
+ atomic_set(&cuip->cui_next_extent, count);
+ xfs_trans_add_item(tp, &cuip->cui_item);
+ set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
+ return &cuip->cui_item;
+}
+
static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_size = xfs_cui_item_size,
.iop_format = xfs_cui_item_format,
@@ -581,6 +593,7 @@ static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_release = xfs_cui_item_release,
.iop_recover = xfs_cui_item_recover,
.iop_match = xfs_cui_item_match,
+ .iop_relog = xfs_cui_item_relog,
};
/*
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index dc5b0753cd51..7adc996ca6e3 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -467,14 +467,14 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
STATIC int
xfs_rui_item_recover(
struct xfs_log_item *lip,
- struct xfs_trans *parent_tp)
+ struct list_head *capture_list)
{
struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
struct xfs_map_extent *rmap;
struct xfs_rud_log_item *rudp;
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
- struct xfs_mount *mp = parent_tp->t_mountp;
+ struct xfs_mount *mp = lip->li_mountp;
xfs_fsblock_t startblock_fsb;
enum xfs_rmap_intent_type type;
xfs_exntst_t state;
@@ -511,14 +511,8 @@ xfs_rui_item_recover(
rmap->me_len == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
rmap->me_len >= mp->m_sb.sb_agblocks ||
- (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) {
- /*
- * This will pull the RUI from the AIL and
- * free the memory associated with it.
- */
- xfs_rui_release(ruip);
+ (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS))
return -EFSCORRUPTED;
- }
}
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
@@ -573,8 +567,7 @@ xfs_rui_item_recover(
}
xfs_rmap_finish_one_cleanup(tp, rcur, error);
- error = xfs_trans_commit(tp);
- return error;
+ return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
abort_error:
xfs_rmap_finish_one_cleanup(tp, rcur, error);
@@ -590,6 +583,32 @@ xfs_rui_item_match(
return RUI_ITEM(lip)->rui_format.rui_id == intent_id;
}
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_rui_item_relog(
+ struct xfs_log_item *intent,
+ struct xfs_trans *tp)
+{
+ struct xfs_rud_log_item *rudp;
+ struct xfs_rui_log_item *ruip;
+ struct xfs_map_extent *extp;
+ unsigned int count;
+
+ count = RUI_ITEM(intent)->rui_format.rui_nextents;
+ extp = RUI_ITEM(intent)->rui_format.rui_extents;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ rudp = xfs_trans_get_rud(tp, RUI_ITEM(intent));
+ set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
+
+ ruip = xfs_rui_init(tp->t_mountp, count);
+ memcpy(ruip->rui_format.rui_extents, extp, count * sizeof(*extp));
+ atomic_set(&ruip->rui_next_extent, count);
+ xfs_trans_add_item(tp, &ruip->rui_item);
+ set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
+ return &ruip->rui_item;
+}
+
static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_size = xfs_rui_item_size,
.iop_format = xfs_rui_item_format,
@@ -597,6 +616,7 @@ static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_release = xfs_rui_item_release,
.iop_recover = xfs_rui_item_recover,
.iop_match = xfs_rui_item_match,
+ .iop_relog = xfs_rui_item_relog,
};
/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 5b89c12f1566..ede1baf31413 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -18,7 +18,7 @@
#include "xfs_trans_space.h"
#include "xfs_icache.h"
#include "xfs_rtalloc.h"
-
+#include "xfs_sb.h"
/*
* Read and return the summary information for a given extent size,
@@ -778,8 +778,14 @@ xfs_growfs_rt_alloc(
struct xfs_bmbt_irec map; /* block map output */
int nmap; /* number of block maps */
int resblks; /* space reservation */
+ enum xfs_blft buf_type;
struct xfs_trans *tp;
+ if (ip == mp->m_rsumip)
+ buf_type = XFS_BLFT_RTSUMMARY_BUF;
+ else
+ buf_type = XFS_BLFT_RTBITMAP_BUF;
+
/*
* Allocate space to the file, as necessary.
*/
@@ -841,6 +847,9 @@ xfs_growfs_rt_alloc(
mp->m_bsize, 0, &bp);
if (error)
goto out_trans_cancel;
+
+ xfs_trans_buf_set_type(tp, bp, buf_type);
+ bp->b_ops = &xfs_rtbuf_ops;
memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
/*
@@ -1015,23 +1024,29 @@ xfs_growfs_rt(
/*
* Lock out other callers by grabbing the bitmap inode lock.
*/
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
/*
- * Update the bitmap inode's size.
+ * Update the bitmap inode's size ondisk and incore. We need
+ * to update the incore size so that inode inactivation won't
+ * punch what it thinks are "posteof" blocks.
*/
mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize;
+ i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
/*
* Get the summary inode into the transaction.
*/
- xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
+ xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
/*
- * Update the summary inode's size.
+ * Update the summary inode's size. We need to update the
+ * incore size so that inode inactivation won't punch what it
+ * thinks are "posteof" blocks.
*/
mp->m_rsumip->i_d.di_size = nmp->m_rsumsize;
+ i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
/*
* Copy summary data from old to new sizes.
@@ -1087,7 +1102,13 @@ error_cancel:
if (error)
break;
}
+ if (error)
+ goto out_free;
+
+ /* Update secondary superblocks now the physical grow has completed */
+ error = xfs_update_secondary_sbs(mp);
+out_free:
/*
* Free the fake mp structure.
*/
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index f70f1255220b..20e0534a772c 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -23,6 +23,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
uint64_t xs_xstrat_bytes = 0;
uint64_t xs_write_bytes = 0;
uint64_t xs_read_bytes = 0;
+ uint64_t defer_relog = 0;
static const struct xstats_entry {
char *desc;
@@ -70,10 +71,13 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes;
xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes;
xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes;
+ defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
}
len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+ len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
+ defer_relog);
len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
#if defined(DEBUG)
1);
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 34d704f703d2..43ffba74f045 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -137,6 +137,7 @@ struct __xfsstats {
uint64_t xs_xstrat_bytes;
uint64_t xs_write_bytes;
uint64_t xs_read_bytes;
+ uint64_t defer_relog;
};
#define xfsstats_offset(f) (offsetof(struct __xfsstats, f)/sizeof(uint32_t))
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index baf5de30eebb..e3e229e52512 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -794,8 +794,7 @@ xfs_fs_statfs(
statp->f_namelen = MAXNAMELEN - 1;
id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
- statp->f_fsid.val[0] = (u32)id;
- statp->f_fsid.val[1] = (u32)(id >> 32);
+ statp->f_fsid = u64_to_fsid(id);
icount = percpu_counter_sum(&mp->m_icount);
ifree = percpu_counter_sum(&mp->m_ifree);
@@ -1234,25 +1233,12 @@ xfs_fc_parse_param(
case Opt_nouuid:
mp->m_flags |= XFS_MOUNT_NOUUID;
return 0;
- case Opt_ikeep:
- mp->m_flags |= XFS_MOUNT_IKEEP;
- return 0;
- case Opt_noikeep:
- mp->m_flags &= ~XFS_MOUNT_IKEEP;
- return 0;
case Opt_largeio:
mp->m_flags |= XFS_MOUNT_LARGEIO;
return 0;
case Opt_nolargeio:
mp->m_flags &= ~XFS_MOUNT_LARGEIO;
return 0;
- case Opt_attr2:
- mp->m_flags |= XFS_MOUNT_ATTR2;
- return 0;
- case Opt_noattr2:
- mp->m_flags &= ~XFS_MOUNT_ATTR2;
- mp->m_flags |= XFS_MOUNT_NOATTR2;
- return 0;
case Opt_filestreams:
mp->m_flags |= XFS_MOUNT_FILESTREAMS;
return 0;
@@ -1304,6 +1290,24 @@ xfs_fc_parse_param(
xfs_mount_set_dax_mode(mp, result.uint_32);
return 0;
#endif
+ /* Following mount options will be removed in September 2025 */
+ case Opt_ikeep:
+ xfs_warn(mp, "%s mount option is deprecated.", param->key);
+ mp->m_flags |= XFS_MOUNT_IKEEP;
+ return 0;
+ case Opt_noikeep:
+ xfs_warn(mp, "%s mount option is deprecated.", param->key);
+ mp->m_flags &= ~XFS_MOUNT_IKEEP;
+ return 0;
+ case Opt_attr2:
+ xfs_warn(mp, "%s mount option is deprecated.", param->key);
+ mp->m_flags |= XFS_MOUNT_ATTR2;
+ return 0;
+ case Opt_noattr2:
+ xfs_warn(mp, "%s mount option is deprecated.", param->key);
+ mp->m_flags &= ~XFS_MOUNT_ATTR2;
+ mp->m_flags |= XFS_MOUNT_NOATTR2;
+ return 0;
default:
xfs_warn(mp, "unknown mount option [%s].", param->key);
return -EINVAL;
@@ -1450,6 +1454,19 @@ xfs_fc_fill_super(
if (error)
goto out_free_sb;
+ /* V4 support is undergoing deprecation. */
+ if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+#ifdef CONFIG_XFS_SUPPORT_V4
+ xfs_warn_once(mp,
+ "Deprecated V4 format (crc=0) will not be supported after September 2030.");
+#else
+ xfs_warn(mp,
+ "Deprecated V4 format (crc=0) not supported by kernel.");
+ error = -EINVAL;
+ goto out_free_sb;
+#endif
+ }
+
/*
* XFS block mappings use 54 bits to store the logical block offset.
* This should suffice to handle the maximum file size that the VFS
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 021ef96d0542..fac9de7ee6d0 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -50,13 +50,45 @@ xfs_panic_mask_proc_handler(
}
#endif /* CONFIG_PROC_FS */
+STATIC int
+xfs_deprecate_irix_sgid_inherit_proc_handler(
+ struct ctl_table *ctl,
+ int write,
+ void *buffer,
+ size_t *lenp,
+ loff_t *ppos)
+{
+ if (write) {
+ printk_once(KERN_WARNING
+ "XFS: " "%s sysctl option is deprecated.\n",
+ ctl->procname);
+ }
+ return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+}
+
+STATIC int
+xfs_deprecate_irix_symlink_mode_proc_handler(
+ struct ctl_table *ctl,
+ int write,
+ void *buffer,
+ size_t *lenp,
+ loff_t *ppos)
+{
+ if (write) {
+ printk_once(KERN_WARNING
+ "XFS: " "%s sysctl option is deprecated.\n",
+ ctl->procname);
+ }
+ return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+}
+
static struct ctl_table xfs_table[] = {
{
.procname = "irix_sgid_inherit",
.data = &xfs_params.sgid_inherit.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = xfs_deprecate_irix_sgid_inherit_proc_handler,
.extra1 = &xfs_params.sgid_inherit.min,
.extra2 = &xfs_params.sgid_inherit.max
},
@@ -65,7 +97,7 @@ static struct ctl_table xfs_table[] = {
.data = &xfs_params.symlink_mode.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = xfs_deprecate_irix_symlink_mode_proc_handler,
.extra1 = &xfs_params.symlink_mode.min,
.extra2 = &xfs_params.symlink_mode.max
},
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index dcdcf99cfa5d..86951652d3ed 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2533,6 +2533,7 @@ DEFINE_DEFER_PENDING_EVENT(xfs_defer_create_intent);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_cancel_list);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_relog_intent);
#define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ca18a040336a..c94e71f741b6 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -959,7 +959,7 @@ xfs_trans_cancel(
struct xfs_log_item *lip;
list_for_each_entry(lip, &tp->t_items, li_trans)
- ASSERT(!(lip->li_type == XFS_LI_EFD));
+ ASSERT(!xlog_item_is_intent_done(lip));
}
#endif
xfs_trans_unreserve_and_mod_sb(tp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index f46534b75236..084658946cc8 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -55,14 +55,12 @@ struct xfs_log_item {
#define XFS_LI_ABORTED 1
#define XFS_LI_FAILED 2
#define XFS_LI_DIRTY 3 /* log item dirty in transaction */
-#define XFS_LI_RECOVERED 4 /* log intent item has been recovered */
#define XFS_LI_FLAGS \
{ (1 << XFS_LI_IN_AIL), "IN_AIL" }, \
{ (1 << XFS_LI_ABORTED), "ABORTED" }, \
{ (1 << XFS_LI_FAILED), "FAILED" }, \
- { (1 << XFS_LI_DIRTY), "DIRTY" }, \
- { (1 << XFS_LI_RECOVERED), "RECOVERED" }
+ { (1 << XFS_LI_DIRTY), "DIRTY" }
struct xfs_item_ops {
unsigned flags;
@@ -74,10 +72,29 @@ struct xfs_item_ops {
void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
void (*iop_release)(struct xfs_log_item *);
xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
- int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp);
+ int (*iop_recover)(struct xfs_log_item *lip,
+ struct list_head *capture_list);
bool (*iop_match)(struct xfs_log_item *item, uint64_t id);
+ struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent,
+ struct xfs_trans *tp);
};
+/* Is this log item a deferred action intent? */
+static inline bool
+xlog_item_is_intent(struct xfs_log_item *lip)
+{
+ return lip->li_ops->iop_recover != NULL &&
+ lip->li_ops->iop_match != NULL;
+}
+
+/* Is this a log intent-done item? */
+static inline bool
+xlog_item_is_intent_done(struct xfs_log_item *lip)
+{
+ return lip->li_ops->iop_unpin == NULL &&
+ lip->li_ops->iop_push == NULL;
+}
+
/*
* Release the log item as soon as committed. This is for items just logging
* intents that never need to be written back in place.
@@ -243,4 +260,12 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
extern kmem_zone_t *xfs_trans_zone;
+static inline struct xfs_log_item *
+xfs_trans_item_relog(
+ struct xfs_log_item *lip,
+ struct xfs_trans *tp)
+{
+ return lip->li_ops->iop_relog(lip, tp);
+}
+
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 133fc6fc3edd..fe45b0c3970c 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -221,36 +221,27 @@ xfs_trans_mod_dquot(
}
switch (field) {
-
- /*
- * regular disk blk reservation
- */
- case XFS_TRANS_DQ_RES_BLKS:
+ /* regular disk blk reservation */
+ case XFS_TRANS_DQ_RES_BLKS:
qtrx->qt_blk_res += delta;
break;
- /*
- * inode reservation
- */
- case XFS_TRANS_DQ_RES_INOS:
+ /* inode reservation */
+ case XFS_TRANS_DQ_RES_INOS:
qtrx->qt_ino_res += delta;
break;
- /*
- * disk blocks used.
- */
- case XFS_TRANS_DQ_BCOUNT:
+ /* disk blocks used. */
+ case XFS_TRANS_DQ_BCOUNT:
qtrx->qt_bcount_delta += delta;
break;
- case XFS_TRANS_DQ_DELBCOUNT:
+ case XFS_TRANS_DQ_DELBCOUNT:
qtrx->qt_delbcnt_delta += delta;
break;
- /*
- * Inode Count
- */
- case XFS_TRANS_DQ_ICOUNT:
+ /* Inode Count */
+ case XFS_TRANS_DQ_ICOUNT:
if (qtrx->qt_ino_res && delta > 0) {
qtrx->qt_ino_res_used += delta;
ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
@@ -258,17 +249,13 @@ xfs_trans_mod_dquot(
qtrx->qt_icount_delta += delta;
break;
- /*
- * rtblk reservation
- */
- case XFS_TRANS_DQ_RES_RTBLKS:
+ /* rtblk reservation */
+ case XFS_TRANS_DQ_RES_RTBLKS:
qtrx->qt_rtblk_res += delta;
break;
- /*
- * rtblk count
- */
- case XFS_TRANS_DQ_RTBCOUNT:
+ /* rtblk count */
+ case XFS_TRANS_DQ_RTBCOUNT:
if (qtrx->qt_rtblk_res && delta > 0) {
qtrx->qt_rtblk_res_used += delta;
ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
@@ -276,11 +263,11 @@ xfs_trans_mod_dquot(
qtrx->qt_rtbcount_delta += delta;
break;
- case XFS_TRANS_DQ_DELRTBCOUNT:
+ case XFS_TRANS_DQ_DELRTBCOUNT:
qtrx->qt_delrtb_delta += delta;
break;
- default:
+ default:
ASSERT(0);
}