diff options
Diffstat (limited to 'fs/xfs/scrub/xfile.c')
| -rw-r--r-- | fs/xfs/scrub/xfile.c | 324 |
1 files changed, 324 insertions, 0 deletions
diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c new file mode 100644 index 000000000000..c753c79df203 --- /dev/null +++ b/fs/xfs/scrub/xfile.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2018-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "scrub/scrub.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/trace.h" +#include <linux/shmem_fs.h> + +/* + * Swappable Temporary Memory + * ========================== + * + * Online checking sometimes needs to be able to stage a large amount of data + * in memory. This information might not fit in the available memory and it + * doesn't all need to be accessible at all times. In other words, we want an + * indexed data buffer to store data that can be paged out. + * + * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those + * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to + * store our staging data. This file is not installed in the file descriptor + * table so that user programs cannot access the data, which means that the + * xfile must be freed with xfile_destroy. + * + * xfiles assume that the caller will handle all required concurrency + * management; standard vfs locks (freezer and inode) are not taken. Reads + * and writes are satisfied directly from the page cache. + */ + +/* + * xfiles must not be exposed to userspace and require upper layers to + * coordinate access to the one handle returned by the constructor, so + * establish a separate lock class for xfiles to avoid confusing lockdep. + */ +static struct lock_class_key xfile_i_mutex_key; + +/* + * Create an xfile of the given size. The description will be used in the + * trace output. + */ +int +xfile_create( + const char *description, + loff_t isize, + struct xfile **xfilep) +{ + struct inode *inode; + struct xfile *xf; + int error; + + xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS); + if (!xf) + return -ENOMEM; + + xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE); + if (IS_ERR(xf->file)) { + error = PTR_ERR(xf->file); + goto out_xfile; + } + + inode = file_inode(xf->file); + lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key); + + /* + * We don't want to bother with kmapping data during repair, so don't + * allow highmem pages to back this mapping. + */ + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); + + trace_xfile_create(xf); + + *xfilep = xf; + return 0; +out_xfile: + kfree(xf); + return error; +} + +/* Close the file and release all resources. */ +void +xfile_destroy( + struct xfile *xf) +{ + struct inode *inode = file_inode(xf->file); + + trace_xfile_destroy(xf); + + lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key); + fput(xf->file); + kfree(xf); +} + +/* + * Load an object. Since we're treating this file as "memory", any error or + * short IO is treated as a failure to allocate memory. + */ +int +xfile_load( + struct xfile *xf, + void *buf, + size_t count, + loff_t pos) +{ + struct inode *inode = file_inode(xf->file); + unsigned int pflags; + + if (count > MAX_RW_COUNT) + return -ENOMEM; + if (inode->i_sb->s_maxbytes - pos < count) + return -ENOMEM; + + trace_xfile_load(xf, pos, count); + + pflags = memalloc_nofs_save(); + while (count > 0) { + struct folio *folio; + unsigned int len; + unsigned int offset; + + if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, + SGP_READ) < 0) + break; + if (!folio) { + /* + * No data stored at this offset, just zero the output + * buffer until the next page boundary. + */ + len = min_t(ssize_t, count, + PAGE_SIZE - offset_in_page(pos)); + memset(buf, 0, len); + } else { + if (filemap_check_wb_err(inode->i_mapping, 0)) { + folio_unlock(folio); + folio_put(folio); + break; + } + + offset = offset_in_folio(folio, pos); + len = min_t(ssize_t, count, folio_size(folio) - offset); + memcpy(buf, folio_address(folio) + offset, len); + + folio_unlock(folio); + folio_put(folio); + } + count -= len; + pos += len; + buf += len; + } + memalloc_nofs_restore(pflags); + + if (count) + return -ENOMEM; + return 0; +} + +/* + * Store an object. Since we're treating this file as "memory", any error or + * short IO is treated as a failure to allocate memory. + */ +int +xfile_store( + struct xfile *xf, + const void *buf, + size_t count, + loff_t pos) +{ + struct inode *inode = file_inode(xf->file); + unsigned int pflags; + + if (count > MAX_RW_COUNT) + return -ENOMEM; + if (inode->i_sb->s_maxbytes - pos < count) + return -ENOMEM; + + trace_xfile_store(xf, pos, count); + + /* + * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), + * actually allocates a folio instead of erroring out. + */ + if (pos + count > i_size_read(inode)) + i_size_write(inode, pos + count); + + pflags = memalloc_nofs_save(); + while (count > 0) { + struct folio *folio; + unsigned int len; + unsigned int offset; + + if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, + SGP_CACHE) < 0) + break; + if (filemap_check_wb_err(inode->i_mapping, 0)) { + folio_unlock(folio); + folio_put(folio); + break; + } + + offset = offset_in_folio(folio, pos); + len = min_t(ssize_t, count, folio_size(folio) - offset); + memcpy(folio_address(folio) + offset, buf, len); + + folio_mark_dirty(folio); + folio_unlock(folio); + folio_put(folio); + + count -= len; + pos += len; + buf += len; + } + memalloc_nofs_restore(pflags); + + if (count) + return -ENOMEM; + return 0; +} + +/* Find the next written area in the xfile data for a given offset. */ +loff_t +xfile_seek_data( + struct xfile *xf, + loff_t pos) +{ + loff_t ret; + + ret = vfs_llseek(xf->file, pos, SEEK_DATA); + trace_xfile_seek_data(xf, pos, ret); + return ret; +} + +/* + * Grab the (locked) folio for a memory object. The object cannot span a folio + * boundary. Returns the locked folio if successful, NULL if there was no + * folio or it didn't cover the range requested, or an ERR_PTR on failure. + */ +struct folio * +xfile_get_folio( + struct xfile *xf, + loff_t pos, + size_t len, + unsigned int flags) +{ + struct inode *inode = file_inode(xf->file); + struct folio *folio = NULL; + unsigned int pflags; + int error; + + if (inode->i_sb->s_maxbytes - pos < len) + return ERR_PTR(-ENOMEM); + + trace_xfile_get_folio(xf, pos, len); + + /* + * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), + * actually allocates a folio instead of erroring out. + */ + if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode)) + i_size_write(inode, pos + len); + + pflags = memalloc_nofs_save(); + error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, + (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ); + memalloc_nofs_restore(pflags); + if (error) + return ERR_PTR(error); + + if (!folio) + return NULL; + + if (len > folio_size(folio) - offset_in_folio(folio, pos)) { + folio_unlock(folio); + folio_put(folio); + return NULL; + } + + if (filemap_check_wb_err(inode->i_mapping, 0)) { + folio_unlock(folio); + folio_put(folio); + return ERR_PTR(-EIO); + } + + /* + * Mark the folio dirty so that it won't be reclaimed once we drop the + * (potentially last) reference in xfile_put_folio. + */ + if (flags & XFILE_ALLOC) + folio_mark_dirty(folio); + return folio; +} + +/* + * Release the (locked) folio for a memory object. + */ +void +xfile_put_folio( + struct xfile *xf, + struct folio *folio) +{ + trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio)); + + folio_unlock(folio); + folio_put(folio); +} + +/* Discard the page cache that's backing a range of the xfile. */ +void +xfile_discard( + struct xfile *xf, + loff_t pos, + u64 count) +{ + trace_xfile_discard(xf, pos, count); + + shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1); +} |
