summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_trans_resv.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/libxfs/xfs_trans_resv.c')
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c695
1 files changed, 612 insertions, 83 deletions
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 6cd45e8c118d..86a111d0f2fc 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -20,6 +20,14 @@
#include "xfs_qm.h"
#include "xfs_trans_space.h"
#include "xfs_rtbitmap.h"
+#include "xfs_attr_item.h"
+#include "xfs_log.h"
+#include "xfs_defer.h"
+#include "xfs_bmap_item.h"
+#include "xfs_extfree_item.h"
+#include "xfs_rmap_item.h"
+#include "xfs_refcount_item.h"
+#include "xfs_trace.h"
#define _ALLOC true
#define _FREE false
@@ -90,6 +98,14 @@ xfs_refcountbt_block_count(
return num_ops * (2 * mp->m_refc_maxlevels - 1);
}
+static unsigned int
+xfs_rtrefcountbt_block_count(
+ struct xfs_mount *mp,
+ unsigned int num_ops)
+{
+ return num_ops * (2 * mp->m_rtrefc_maxlevels - 1);
+}
+
/*
* Logging inodes is really tricksy. They are logged in memory format,
* which means that what we write into the log doesn't directly translate into
@@ -128,7 +144,7 @@ xfs_calc_inode_res(
(4 * sizeof(struct xlog_op_header) +
sizeof(struct xfs_inode_log_format) +
mp->m_sb.sb_inodesize +
- 2 * XFS_BMBT_BLOCK_LEN(mp));
+ 2 * xfs_bmbt_block_len(mp));
}
/*
@@ -211,7 +227,9 @@ xfs_calc_inode_chunk_res(
* Per-extent log reservation for the btree changes involved in freeing or
* allocating a realtime extent. We have to be able to log as many rtbitmap
* blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime
- * extents, as well as the realtime summary block.
+ * extents, as well as the realtime summary block (t1). Realtime rmap btree
+ * operations happen in a second transaction, so factor in a couple of rtrmapbt
+ * splits (t2).
*/
static unsigned int
xfs_rtalloc_block_count(
@@ -220,10 +238,16 @@ xfs_rtalloc_block_count(
{
unsigned int rtbmp_blocks;
xfs_rtxlen_t rtxlen;
+ unsigned int t1, t2 = 0;
rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
- rtbmp_blocks = xfs_rtbitmap_blockcount(mp, rtxlen);
- return (rtbmp_blocks + 1) * num_ops;
+ rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen);
+ t1 = (rtbmp_blocks + 1) * num_ops;
+
+ if (xfs_has_rmapbt(mp))
+ t2 = num_ops * (2 * mp->m_rtrmap_maxlevels - 1);
+
+ return max(t1, t2);
}
/*
@@ -246,26 +270,64 @@ xfs_rtalloc_block_count(
*/
/*
+ * Finishing a data device refcount updates (t1):
+ * the agfs of the ags containing the blocks: nr_ops * sector size
+ * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_cui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr_ops)
+{
+ if (!xfs_has_reflink(mp))
+ return 0;
+
+ return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Realtime refcount updates (t2);
+ * the rt refcount inode
+ * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_rt_cui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr_ops)
+{
+ if (!xfs_has_rtreflink(mp))
+ return 0;
+
+ return xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
* Compute the log reservation required to handle the refcount update
* transaction. Refcount updates are always done via deferred log items.
*
- * This is calculated as:
+ * This is calculated as the max of:
* Data device refcount updates (t1):
* the agfs of the ags containing the blocks: nr_ops * sector size
* the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
+ * Realtime refcount updates (t2);
+ * the rt refcount inode
+ * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
*/
static unsigned int
xfs_calc_refcountbt_reservation(
struct xfs_mount *mp,
unsigned int nr_ops)
{
- unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+ unsigned int t1, t2;
- if (!xfs_has_reflink(mp))
- return 0;
+ t1 = xfs_calc_finish_cui_reservation(mp, nr_ops);
+ t2 = xfs_calc_finish_rt_cui_reservation(mp, nr_ops);
- return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz);
+ return max(t1, t2);
}
/*
@@ -336,11 +398,11 @@ xfs_calc_write_reservation(
blksz);
t1 += adj;
t3 += adj;
- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+ return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 1);
- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
}
unsigned int
@@ -351,6 +413,96 @@ xfs_calc_write_reservation_minlogsize(
}
/*
+ * Finishing an EFI can free the blocks and bmap blocks (t2):
+ * the agf for each of the ags: nr * sector size
+ * the agfl for each of the ags: nr * sector size
+ * the super block to reflect the freed blocks: sector size
+ * worst case split in allocation btrees per extent assuming nr extents:
+ * nr exts * 2 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_efi_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Or, if it's a realtime file (t3):
+ * the agf for each of the ags: 2 * sector size
+ * the agfl for each of the ags: 2 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * the realtime bitmap:
+ * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 2 exts * 1 block
+ * worst case split in allocation btrees per extent assuming 2 extents:
+ * 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_rt_efi_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ if (!xfs_has_realtime(mp))
+ return 0;
+
+ return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_rtalloc_block_count(mp, nr),
+ mp->m_sb.sb_blocksize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
+ * on each end of the record, and that can cause the AGFL to be refilled or
+ * emptied out.
+ */
+inline unsigned int
+xfs_calc_finish_rui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ if (!xfs_has_rmapbt(mp))
+ return 0;
+ return xfs_calc_finish_efi_reservation(mp, nr);
+}
+
+/*
+ * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
+ * on each end of the record, and that can cause the AGFL to be refilled or
+ * emptied out.
+ */
+inline unsigned int
+xfs_calc_finish_rt_rui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ if (!xfs_has_rtrmapbt(mp))
+ return 0;
+ return xfs_calc_finish_rt_efi_reservation(mp, nr);
+}
+
+/*
+ * In finishing a BUI, we can modify:
+ * the inode being truncated: inode size
+ * dquots
+ * the inode's bmap btree: (max depth + 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_bui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
+ mp->m_sb.sb_blocksize);
+}
+
+/*
* In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size
@@ -382,16 +534,8 @@ xfs_calc_itruncate_reservation(
t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
- t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), blksz);
-
- if (xfs_has_realtime(mp)) {
- t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 2), blksz) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
- } else {
- t3 = 0;
- }
+ t2 = xfs_calc_finish_efi_reservation(mp, 4);
+ t3 = xfs_calc_finish_rt_efi_reservation(mp, 2);
/*
* In the early days of reflink, we included enough reservation to log
@@ -408,11 +552,11 @@ xfs_calc_itruncate_reservation(
xfs_refcountbt_block_count(mp, 4),
blksz);
- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+ return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 2);
- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
}
unsigned int
@@ -422,29 +566,108 @@ xfs_calc_itruncate_reservation_minlogsize(
return xfs_calc_itruncate_reservation(mp, true);
}
+static inline unsigned int xfs_calc_pptr_link_overhead(void)
+{
+ return sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1);
+}
+static inline unsigned int xfs_calc_pptr_unlink_overhead(void)
+{
+ return sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1);
+}
+static inline unsigned int xfs_calc_pptr_replace_overhead(void)
+{
+ return sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1);
+}
+
/*
* In renaming a files we can modify:
* the five inodes involved: 5 * inode size
* the two directory btrees: 2 * (max depth + v2) * dir block size
* the two directory bmap btrees: 2 * max depth * block size
* And the bmap_finish transaction can free dir and bmap blocks (two sets
- * of bmap blocks) giving:
+ * of bmap blocks) giving (t2):
* the agf for the ags in which the blocks live: 3 * sector size
* the agfl for the ags in which the blocks live: 3 * sector size
* the superblock for the free block count: sector size
* the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ * If parent pointers are enabled (t3), then each transaction in the chain
+ * must be capable of setting or removing the extended attribute
+ * containing the parent information. It must also be able to handle
+ * the three xattr intent items that track the progress of the parent
+ * pointer update.
*/
STATIC uint
xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 5) +
- xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int overhead = XFS_DQUOT_LOGRES;
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ t1 = xfs_calc_inode_res(mp, 5) +
+ xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
+ XFS_FSB_TO_B(mp, 1));
+
+ t2 = xfs_calc_finish_efi_reservation(mp, 3);
+
+ if (xfs_has_parent(mp)) {
+ unsigned int rename_overhead, exchange_overhead;
+
+ t3 = max(resp->tr_attrsetm.tr_logres,
+ resp->tr_attrrm.tr_logres);
+
+ /*
+ * For a standard rename, the three xattr intent log items
+ * are (1) replacing the pptr for the source file; (2)
+ * removing the pptr on the dest file; and (3) adding a
+ * pptr for the whiteout file in the src dir.
+ *
+ * For an RENAME_EXCHANGE, there are two xattr intent
+ * items to replace the pptr for both src and dest
+ * files. Link counts don't change and there is no
+ * whiteout.
+ *
+ * In the worst case we can end up relogging all log
+ * intent items to allow the log tail to move ahead, so
+ * they become overhead added to each transaction in a
+ * processing chain.
+ */
+ rename_overhead = xfs_calc_pptr_replace_overhead() +
+ xfs_calc_pptr_unlink_overhead() +
+ xfs_calc_pptr_link_overhead();
+ exchange_overhead = 2 * xfs_calc_pptr_replace_overhead();
+
+ overhead += max(rename_overhead, exchange_overhead);
+ }
+
+ return overhead + max3(t1, t2, t3);
+}
+
+static inline unsigned int
+xfs_rename_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ /* One for the rename, one more for freeing blocks */
+ unsigned int ret = XFS_RENAME_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to remove or add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += max(resp->tr_attrsetm.tr_logcount,
+ resp->tr_attrrm.tr_logcount);
+
+ return ret;
}
/*
@@ -461,6 +684,23 @@ xfs_calc_iunlink_remove_reservation(
2 * M_IGEO(mp)->inode_cluster_size;
}
+static inline unsigned int
+xfs_link_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_LINK_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
+
/*
* For creating a link to an inode:
* the parent directory inode: inode size
@@ -477,14 +717,21 @@ STATIC uint
xfs_calc_link_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- xfs_calc_iunlink_remove_reservation(mp) +
- max((xfs_calc_inode_res(mp, 2) +
- xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int overhead = XFS_DQUOT_LOGRES;
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ overhead += xfs_calc_iunlink_remove_reservation(mp);
+ t1 = xfs_calc_inode_res(mp, 2) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_finish_efi_reservation(mp, 1);
+
+ if (xfs_has_parent(mp)) {
+ t3 = resp->tr_attrsetm.tr_logres;
+ overhead += xfs_calc_pptr_link_overhead();
+ }
+
+ return overhead + max3(t1, t2, t3);
}
/*
@@ -499,6 +746,23 @@ xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
M_IGEO(mp)->inode_cluster_size;
}
+static inline unsigned int
+xfs_remove_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_REMOVE_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrrm.tr_logcount;
+
+ return ret;
+}
+
/*
* For removing a directory entry we can modify:
* the parent directory inode: inode size
@@ -515,14 +779,22 @@ STATIC uint
xfs_calc_remove_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- xfs_calc_iunlink_add_reservation(mp) +
- max((xfs_calc_inode_res(mp, 2) +
- xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int overhead = XFS_DQUOT_LOGRES;
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ overhead += xfs_calc_iunlink_add_reservation(mp);
+
+ t1 = xfs_calc_inode_res(mp, 2) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_finish_efi_reservation(mp, 2);
+
+ if (xfs_has_parent(mp)) {
+ t3 = resp->tr_attrrm.tr_logres;
+ overhead += xfs_calc_pptr_unlink_overhead();
+ }
+
+ return overhead + max3(t1, t2, t3);
}
/*
@@ -571,24 +843,69 @@ xfs_calc_icreate_resv_alloc(
xfs_calc_finobt_res(mp);
}
+static inline unsigned int
+xfs_icreate_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_CREATE_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
+
STATIC uint
-xfs_calc_icreate_reservation(xfs_mount_t *mp)
+xfs_calc_icreate_reservation(
+ struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max(xfs_calc_icreate_resv_alloc(mp),
- xfs_calc_create_resv_modify(mp));
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int overhead = XFS_DQUOT_LOGRES;
+ unsigned int t1, t2, t3 = 0;
+
+ t1 = xfs_calc_icreate_resv_alloc(mp);
+ t2 = xfs_calc_create_resv_modify(mp);
+
+ if (xfs_has_parent(mp)) {
+ t3 = resp->tr_attrsetm.tr_logres;
+ overhead += xfs_calc_pptr_link_overhead();
+ }
+
+ return overhead + max3(t1, t2, t3);
}
STATIC uint
xfs_calc_create_tmpfile_reservation(
struct xfs_mount *mp)
{
- uint res = XFS_DQUOT_LOGRES(mp);
+ uint res = XFS_DQUOT_LOGRES;
res += xfs_calc_icreate_resv_alloc(mp);
return res + xfs_calc_iunlink_add_reservation(mp);
}
+static inline unsigned int
+xfs_mkdir_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_MKDIR_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
+
/*
* Making a new directory is the same as creating a new file.
*/
@@ -599,6 +916,22 @@ xfs_calc_mkdir_reservation(
return xfs_calc_icreate_reservation(mp);
}
+static inline unsigned int
+xfs_symlink_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_SYMLINK_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
/*
* Making a new symplink is the same as creating a new file, but
@@ -632,7 +965,7 @@ STATIC uint
xfs_calc_ifree_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_iunlink_remove_reservation(mp) +
@@ -649,7 +982,7 @@ STATIC uint
xfs_calc_ichange_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
@@ -721,7 +1054,7 @@ xfs_calc_growrtfree_reservation(
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
- xfs_calc_buf_res(1, mp->m_rsumsize);
+ xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks));
}
/*
@@ -758,7 +1091,7 @@ STATIC uint
xfs_calc_addafork_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
@@ -806,7 +1139,7 @@ STATIC uint
xfs_calc_attrsetm_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
@@ -846,7 +1179,7 @@ STATIC uint
xfs_calc_attrrm_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
max((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
XFS_FSB_TO_B(mp, 1)) +
@@ -911,54 +1244,85 @@ xfs_calc_sb_reservation(
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
-void
-xfs_trans_resv_calc(
+/*
+ * Namespace reservations.
+ *
+ * These get tricky when parent pointers are enabled as we have attribute
+ * modifications occurring from within these transactions. Rather than confuse
+ * each of these reservation calculations with the conditional attribute
+ * reservations, add them here in a clear and concise manner. This requires that
+ * the attribute reservations have already been calculated.
+ *
+ * Note that we only include the static attribute reservation here; the runtime
+ * reservation will have to be modified by the size of the attributes being
+ * added/removed/modified. See the comments on the attribute reservation
+ * calculations for more details.
+ */
+STATIC void
+xfs_calc_namespace_reservations(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
- int logcount_adj = 0;
-
- /*
- * The following transactions are logged in physical format and
- * require a permanent reservation on space.
- */
- resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
- resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
- resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
- resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
- resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
- resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ ASSERT(resp->tr_attrsetm.tr_logres > 0);
resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
- resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
+ resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp);
resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
- resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
+ resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp);
resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
- resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
+ resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp);
resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
- resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
+ resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp);
resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
- resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
+ resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp);
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
+ resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp);
+ resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+}
+
+STATIC void
+xfs_calc_default_atomic_ioend_reservation(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ /* Pick a default that will scale reasonably for the log size. */
+ resp->tr_atomic_ioend = resp->tr_itruncate;
+}
+
+void
+xfs_trans_resv_calc(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ int logcount_adj = 0;
+
+ /*
+ * The following transactions are logged in physical format and
+ * require a permanent reservation on space.
+ */
+ resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
+ resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
+ resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+ resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
+ resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
+ resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
resp->tr_create_tmpfile.tr_logres =
xfs_calc_create_tmpfile_reservation(mp);
resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
- resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
- resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
- resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -988,6 +1352,8 @@ xfs_trans_resv_calc(
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ xfs_calc_namespace_reservations(mp, resp);
+
/*
* The following transactions are logged in logical format with
* a default log count.
@@ -1027,4 +1393,167 @@ xfs_trans_resv_calc(
resp->tr_itruncate.tr_logcount += logcount_adj;
resp->tr_write.tr_logcount += logcount_adj;
resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
+
+ /*
+ * Now that we've finished computing the static reservations, we can
+ * compute the dynamic reservation for atomic writes.
+ */
+ xfs_calc_default_atomic_ioend_reservation(mp, resp);
+}
+
+/*
+ * Return the per-extent and fixed transaction reservation sizes needed to
+ * complete an atomic write.
+ */
+STATIC unsigned int
+xfs_calc_atomic_write_ioend_geometry(
+ struct xfs_mount *mp,
+ unsigned int *step_size)
+{
+ const unsigned int efi = xfs_efi_log_space(1);
+ const unsigned int efd = xfs_efd_log_space(1);
+ const unsigned int rui = xfs_rui_log_space(1);
+ const unsigned int rud = xfs_rud_log_space();
+ const unsigned int cui = xfs_cui_log_space(1);
+ const unsigned int cud = xfs_cud_log_space();
+ const unsigned int bui = xfs_bui_log_space(1);
+ const unsigned int bud = xfs_bud_log_space();
+
+ /*
+ * Maximum overhead to complete an atomic write ioend in software:
+ * remove data fork extent + remove cow fork extent + map extent into
+ * data fork.
+ *
+ * tx0: Creates a BUI and a CUI and that's all it needs.
+ *
+ * tx1: Roll to finish the BUI. Need space for the BUD, an RUI, and
+ * enough space to relog the CUI (== CUI + CUD).
+ *
+ * tx2: Roll again to finish the RUI. Need space for the RUD and space
+ * to relog the CUI.
+ *
+ * tx3: Roll again, need space for the CUD and possibly a new EFI.
+ *
+ * tx4: Roll again, need space for an EFD.
+ *
+ * If the extent referenced by the pair of BUI/CUI items is not the one
+ * being currently processed, then we need to reserve space to relog
+ * both items.
+ */
+ const unsigned int tx0 = bui + cui;
+ const unsigned int tx1 = bud + rui + cui + cud;
+ const unsigned int tx2 = rud + cui + cud;
+ const unsigned int tx3 = cud + efi;
+ const unsigned int tx4 = efd;
+ const unsigned int relog = bui + bud + cui + cud;
+
+ const unsigned int per_intent = max(max3(tx0, tx1, tx2),
+ max3(tx3, tx4, relog));
+
+ /* Overhead to finish one step of each intent item type */
+ const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
+ const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
+ const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1);
+ const unsigned int f4 = xfs_calc_finish_bui_reservation(mp, 1);
+
+ /* We only finish one item per transaction in a chain */
+ *step_size = max(f4, max3(f1, f2, f3));
+
+ return per_intent;
+}
+
+/*
+ * Compute the maximum size (in fsblocks) of atomic writes that we can complete
+ * given the existing log reservations.
+ */
+xfs_extlen_t
+xfs_calc_max_atomic_write_fsblocks(
+ struct xfs_mount *mp)
+{
+ const struct xfs_trans_res *resv = &M_RES(mp)->tr_atomic_ioend;
+ unsigned int per_intent = 0;
+ unsigned int step_size = 0;
+ unsigned int ret = 0;
+
+ if (resv->tr_logres > 0) {
+ per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
+ &step_size);
+
+ if (resv->tr_logres >= step_size)
+ ret = (resv->tr_logres - step_size) / per_intent;
+ }
+
+ trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size,
+ resv->tr_logres, ret);
+
+ return ret;
+}
+
+/*
+ * Compute the log blocks and transaction reservation needed to complete an
+ * atomic write of a given number of blocks. Worst case, each block requires
+ * separate handling. A return value of 0 means something went wrong.
+ */
+xfs_extlen_t
+xfs_calc_atomic_write_log_geometry(
+ struct xfs_mount *mp,
+ xfs_extlen_t blockcount,
+ unsigned int *new_logres)
+{
+ struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend;
+ uint old_logres = curr_res->tr_logres;
+ unsigned int per_intent, step_size;
+ unsigned int logres;
+ xfs_extlen_t min_logblocks;
+
+ ASSERT(blockcount > 0);
+
+ xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
+
+ per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
+
+ /* Check for overflows */
+ if (check_mul_overflow(blockcount, per_intent, &logres) ||
+ check_add_overflow(logres, step_size, &logres))
+ return 0;
+
+ curr_res->tr_logres = logres;
+ min_logblocks = xfs_log_calc_minimum_size(mp);
+ curr_res->tr_logres = old_logres;
+
+ trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
+ blockcount, min_logblocks, logres);
+
+ *new_logres = logres;
+ return min_logblocks;
+}
+
+/*
+ * Compute the transaction reservation needed to complete an out of place
+ * atomic write of a given number of blocks.
+ */
+int
+xfs_calc_atomic_write_reservation(
+ struct xfs_mount *mp,
+ xfs_extlen_t blockcount)
+{
+ unsigned int new_logres;
+ xfs_extlen_t min_logblocks;
+
+ /*
+ * If the caller doesn't ask for a specific atomic write size, then
+ * use the defaults.
+ */
+ if (blockcount == 0) {
+ xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
+ return 0;
+ }
+
+ min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
+ &new_logres);
+ if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
+ return -EINVAL;
+
+ M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
+ return 0;
}