diff options
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r-- | fs/xfs/xfs_file.c | 141 |
1 files changed, 107 insertions, 34 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 84f08c976ac4..55a304cb3aef 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -497,7 +497,7 @@ restart: static ssize_t xfs_zoned_write_space_reserve( - struct xfs_inode *ip, + struct xfs_mount *mp, struct kiocb *iocb, struct iov_iter *from, unsigned int flags, @@ -533,8 +533,8 @@ xfs_zoned_write_space_reserve( * * Any remaining block will be returned after the write. */ - return xfs_zoned_space_reserve(ip, - XFS_B_TO_FSB(ip->i_mount, count) + 1 + 2, flags, ac); + return xfs_zoned_space_reserve(mp, XFS_B_TO_FSB(mp, count) + 1 + 2, + flags, ac); } static int @@ -576,7 +576,10 @@ xfs_dio_write_end_io( nofs_flag = memalloc_nofs_save(); if (flags & IOMAP_DIO_COW) { - error = xfs_reflink_end_cow(ip, offset, size); + if (iocb->ki_flags & IOCB_ATOMIC) + error = xfs_reflink_end_atomic_cow(ip, offset, size); + else + error = xfs_reflink_end_cow(ip, offset, size); if (error) goto out; } @@ -715,13 +718,79 @@ xfs_file_dio_write_zoned( struct xfs_zone_alloc_ctx ac = { }; ssize_t ret; - ret = xfs_zoned_write_space_reserve(ip, iocb, from, 0, &ac); + ret = xfs_zoned_write_space_reserve(ip->i_mount, iocb, from, 0, &ac); if (ret < 0) return ret; ret = xfs_file_dio_write_aligned(ip, iocb, from, &xfs_zoned_direct_write_iomap_ops, &xfs_dio_zoned_write_ops, &ac); - xfs_zoned_space_unreserve(ip, &ac); + xfs_zoned_space_unreserve(ip->i_mount, &ac); + return ret; +} + +/* + * Handle block atomic writes + * + * Two methods of atomic writes are supported: + * - REQ_ATOMIC-based, which would typically use some form of HW offload in the + * disk + * - COW-based, which uses a COW fork as a staging extent for data updates + * before atomically updating extent mappings for the range being written + * + */ +static noinline ssize_t +xfs_file_dio_write_atomic( + struct xfs_inode *ip, + struct kiocb *iocb, + struct iov_iter *from) +{ + unsigned int iolock = XFS_IOLOCK_SHARED; + ssize_t ret, ocount = iov_iter_count(from); + const struct iomap_ops *dops; + + /* + * HW offload should be faster, so try that first if it is already + * known that the write length is not too large. + */ + if (ocount > xfs_inode_buftarg(ip)->bt_awu_max) + dops = &xfs_atomic_write_cow_iomap_ops; + else + dops = &xfs_direct_write_iomap_ops; + +retry: + ret = xfs_ilock_iocb_for_write(iocb, &iolock); + if (ret) + return ret; + + ret = xfs_file_write_checks(iocb, from, &iolock, NULL); + if (ret) + goto out_unlock; + + /* Demote similar to xfs_file_dio_write_aligned() */ + if (iolock == XFS_IOLOCK_EXCL) { + xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); + iolock = XFS_IOLOCK_SHARED; + } + + trace_xfs_file_direct_write(iocb, from); + ret = iomap_dio_rw(iocb, from, dops, &xfs_dio_write_ops, + 0, NULL, 0); + + /* + * The retry mechanism is based on the ->iomap_begin method returning + * -ENOPROTOOPT, which would be when the REQ_ATOMIC-based write is not + * possible. The REQ_ATOMIC-based method typically not be possible if + * the write spans multiple extents or the disk blocks are misaligned. + */ + if (ret == -ENOPROTOOPT && dops == &xfs_direct_write_iomap_ops) { + xfs_iunlock(ip, iolock); + dops = &xfs_atomic_write_cow_iomap_ops; + goto retry; + } + +out_unlock: + if (iolock) + xfs_iunlock(ip, iolock); return ret; } @@ -840,6 +909,8 @@ xfs_file_dio_write( return xfs_file_dio_write_unaligned(ip, iocb, from); if (xfs_is_zoned_inode(ip)) return xfs_file_dio_write_zoned(ip, iocb, from); + if (iocb->ki_flags & IOCB_ATOMIC) + return xfs_file_dio_write_atomic(ip, iocb, from); return xfs_file_dio_write_aligned(ip, iocb, from, &xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL); } @@ -908,7 +979,8 @@ write_retry: trace_xfs_file_buffered_write(iocb, from); ret = iomap_file_buffered_write(iocb, from, - &xfs_buffered_write_iomap_ops, NULL); + &xfs_buffered_write_iomap_ops, &xfs_iomap_write_ops, + NULL); /* * If we hit a space limit, try to free up some lingering preallocated @@ -961,7 +1033,7 @@ xfs_file_buffered_write_zoned( struct xfs_zone_alloc_ctx ac = { }; ssize_t ret; - ret = xfs_zoned_write_space_reserve(ip, iocb, from, XFS_ZR_GREEDY, &ac); + ret = xfs_zoned_write_space_reserve(mp, iocb, from, XFS_ZR_GREEDY, &ac); if (ret < 0) return ret; @@ -988,7 +1060,8 @@ xfs_file_buffered_write_zoned( retry: trace_xfs_file_buffered_write(iocb, from); ret = iomap_file_buffered_write(iocb, from, - &xfs_buffered_write_iomap_ops, &ac); + &xfs_buffered_write_iomap_ops, &xfs_iomap_write_ops, + &ac); if (ret == -ENOSPC && !cleared_space) { /* * Kick off writeback to convert delalloc space and release the @@ -1002,7 +1075,7 @@ retry: out_unlock: xfs_iunlock(ip, iolock); out_unreserve: - xfs_zoned_space_unreserve(ip, &ac); + xfs_zoned_space_unreserve(ip->i_mount, &ac); if (ret > 0) { XFS_STATS_ADD(mp, xs_write_bytes, ret); ret = generic_write_sync(iocb, ret); @@ -1032,14 +1105,12 @@ xfs_file_write_iter( return xfs_file_dax_write(iocb, from); if (iocb->ki_flags & IOCB_ATOMIC) { - /* - * Currently only atomic writing of a single FS block is - * supported. It would be possible to atomic write smaller than - * a FS block, but there is no requirement to support this. - * Note that iomap also does not support this yet. - */ - if (ocount != ip->i_mount->m_sb.sb_blocksize) + if (ocount < xfs_get_atomic_write_min(ip)) + return -EINVAL; + + if (ocount > xfs_get_atomic_write_max(ip)) return -EINVAL; + ret = generic_atomic_write_valid(iocb, from); if (ret) return ret; @@ -1266,9 +1337,10 @@ xfs_falloc_allocate_range( } #define XFS_FALLOC_FL_SUPPORTED \ - (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) + (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \ + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) STATIC long __xfs_file_fallocate( @@ -1344,11 +1416,11 @@ xfs_file_zoned_fallocate( struct xfs_inode *ip = XFS_I(file_inode(file)); int error; - error = xfs_zoned_space_reserve(ip, 2, XFS_ZR_RESERVED, &ac); + error = xfs_zoned_space_reserve(ip->i_mount, 2, XFS_ZR_RESERVED, &ac); if (error) return error; error = __xfs_file_fallocate(file, mode, offset, len, &ac); - xfs_zoned_space_unreserve(ip, &ac); + xfs_zoned_space_unreserve(ip->i_mount, &ac); return error; } @@ -1488,7 +1560,7 @@ xfs_file_open( if (xfs_is_shutdown(XFS_M(inode->i_sb))) return -EIO; file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; - if (xfs_inode_can_atomicwrite(XFS_I(inode))) + if (xfs_get_atomic_write_min(XFS_I(inode)) > 0) file->f_mode |= FMODE_CAN_ATOMIC_WRITE; return generic_file_open(inode, file); } @@ -1660,7 +1732,7 @@ xfs_dax_fault_locked( bool write_fault) { vm_fault_t ret; - pfn_t pfn; + unsigned long pfn; if (!IS_ENABLED(CONFIG_FS_DAX)) { ASSERT(0); @@ -1758,12 +1830,12 @@ xfs_write_fault_zoned( * But as the overallocation is limited to less than a folio and will be * release instantly that's just fine. */ - error = xfs_zoned_space_reserve(ip, XFS_B_TO_FSB(ip->i_mount, len), 0, - &ac); + error = xfs_zoned_space_reserve(ip->i_mount, + XFS_B_TO_FSB(ip->i_mount, len), 0, &ac); if (error < 0) return vmf_fs_error(error); ret = __xfs_write_fault(vmf, order, &ac); - xfs_zoned_space_unreserve(ip, &ac); + xfs_zoned_space_unreserve(ip->i_mount, &ac); return ret; } @@ -1844,10 +1916,10 @@ static const struct vm_operations_struct xfs_file_vm_ops = { }; STATIC int -xfs_file_mmap( - struct file *file, - struct vm_area_struct *vma) +xfs_file_mmap_prepare( + struct vm_area_desc *desc) { + struct file *file = desc->file; struct inode *inode = file_inode(file); struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode)); @@ -1855,13 +1927,14 @@ xfs_file_mmap( * We don't support synchronous mappings for non-DAX files and * for DAX files if underneath dax_device is not synchronous. */ - if (!daxdev_mapping_supported(vma, target->bt_daxdev)) + if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file), + target->bt_daxdev)) return -EOPNOTSUPP; file_accessed(file); - vma->vm_ops = &xfs_file_vm_ops; + desc->vm_ops = &xfs_file_vm_ops; if (IS_DAX(inode)) - vm_flags_set(vma, VM_HUGEPAGE); + desc->vm_flags |= VM_HUGEPAGE; return 0; } @@ -1876,7 +1949,7 @@ const struct file_operations xfs_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, #endif - .mmap = xfs_file_mmap, + .mmap_prepare = xfs_file_mmap_prepare, .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, |