summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_iops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_iops.c')
-rw-r--r--fs/xfs/xfs_iops.c62
1 files changed, 38 insertions, 24 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 207e0dadffc3..40289fe6f5b2 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -573,17 +573,43 @@ xfs_stat_blksize(
}
static void
-xfs_get_atomic_write_attr(
+xfs_report_dioalign(
struct xfs_inode *ip,
- unsigned int *unit_min,
- unsigned int *unit_max)
+ struct kstat *stat)
{
- if (!xfs_inode_can_atomicwrite(ip)) {
- *unit_min = *unit_max = 0;
- return;
- }
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+ struct block_device *bdev = target->bt_bdev;
+
+ stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN;
+ stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- *unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
+ /*
+ * For COW inodes, we can only perform out of place writes of entire
+ * allocation units (blocks or RT extents).
+ * For writes smaller than the allocation unit, we must fall back to
+ * buffered I/O to perform read-modify-write cycles. At best this is
+ * highly inefficient; at worst it leads to page cache invalidation
+ * races. Tell applications to avoid this by reporting the larger write
+ * alignment in dio_offset_align, and the smaller read alignment in
+ * dio_read_offset_align.
+ */
+ stat->dio_read_offset_align = bdev_logical_block_size(bdev);
+ if (xfs_is_cow_inode(ip))
+ stat->dio_offset_align = xfs_inode_alloc_unitsize(ip);
+ else
+ stat->dio_offset_align = stat->dio_read_offset_align;
+}
+
+static void
+xfs_report_atomic_write(
+ struct xfs_inode *ip,
+ struct kstat *stat)
+{
+ unsigned int unit_min = 0, unit_max = 0;
+
+ if (xfs_inode_can_atomicwrite(ip))
+ unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize;
+ generic_fill_statx_atomic_writes(stat, unit_min, unit_max);
}
STATIC int
@@ -647,22 +673,10 @@ xfs_vn_getattr(
stat->rdev = inode->i_rdev;
break;
case S_IFREG:
- if (request_mask & STATX_DIOALIGN) {
- struct xfs_buftarg *target = xfs_inode_buftarg(ip);
- struct block_device *bdev = target->bt_bdev;
-
- stat->result_mask |= STATX_DIOALIGN;
- stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- stat->dio_offset_align = bdev_logical_block_size(bdev);
- }
- if (request_mask & STATX_WRITE_ATOMIC) {
- unsigned int unit_min, unit_max;
-
- xfs_get_atomic_write_attr(ip, &unit_min,
- &unit_max);
- generic_fill_statx_atomic_writes(stat,
- unit_min, unit_max);
- }
+ if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN))
+ xfs_report_dioalign(ip, stat);
+ if (request_mask & STATX_WRITE_ATOMIC)
+ xfs_report_atomic_write(ip, stat);
fallthrough;
default:
stat->blksize = xfs_stat_blksize(ip);