summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/xfile.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub/xfile.c')
-rw-r--r--fs/xfs/scrub/xfile.c324
1 files changed, 324 insertions, 0 deletions
diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
new file mode 100644
index 000000000000..c753c79df203
--- /dev/null
+++ b/fs/xfs/scrub/xfile.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "scrub/scrub.h"
+#include "scrub/xfile.h"
+#include "scrub/xfarray.h"
+#include "scrub/trace.h"
+#include <linux/shmem_fs.h>
+
+/*
+ * Swappable Temporary Memory
+ * ==========================
+ *
+ * Online checking sometimes needs to be able to stage a large amount of data
+ * in memory. This information might not fit in the available memory and it
+ * doesn't all need to be accessible at all times. In other words, we want an
+ * indexed data buffer to store data that can be paged out.
+ *
+ * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
+ * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to
+ * store our staging data. This file is not installed in the file descriptor
+ * table so that user programs cannot access the data, which means that the
+ * xfile must be freed with xfile_destroy.
+ *
+ * xfiles assume that the caller will handle all required concurrency
+ * management; standard vfs locks (freezer and inode) are not taken. Reads
+ * and writes are satisfied directly from the page cache.
+ */
+
+/*
+ * xfiles must not be exposed to userspace and require upper layers to
+ * coordinate access to the one handle returned by the constructor, so
+ * establish a separate lock class for xfiles to avoid confusing lockdep.
+ */
+static struct lock_class_key xfile_i_mutex_key;
+
+/*
+ * Create an xfile of the given size. The description will be used in the
+ * trace output.
+ */
+int
+xfile_create(
+ const char *description,
+ loff_t isize,
+ struct xfile **xfilep)
+{
+ struct inode *inode;
+ struct xfile *xf;
+ int error;
+
+ xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
+ if (!xf)
+ return -ENOMEM;
+
+ xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
+ if (IS_ERR(xf->file)) {
+ error = PTR_ERR(xf->file);
+ goto out_xfile;
+ }
+
+ inode = file_inode(xf->file);
+ lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
+
+ /*
+ * We don't want to bother with kmapping data during repair, so don't
+ * allow highmem pages to back this mapping.
+ */
+ mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
+
+ trace_xfile_create(xf);
+
+ *xfilep = xf;
+ return 0;
+out_xfile:
+ kfree(xf);
+ return error;
+}
+
+/* Close the file and release all resources. */
+void
+xfile_destroy(
+ struct xfile *xf)
+{
+ struct inode *inode = file_inode(xf->file);
+
+ trace_xfile_destroy(xf);
+
+ lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
+ fput(xf->file);
+ kfree(xf);
+}
+
+/*
+ * Load an object. Since we're treating this file as "memory", any error or
+ * short IO is treated as a failure to allocate memory.
+ */
+int
+xfile_load(
+ struct xfile *xf,
+ void *buf,
+ size_t count,
+ loff_t pos)
+{
+ struct inode *inode = file_inode(xf->file);
+ unsigned int pflags;
+
+ if (count > MAX_RW_COUNT)
+ return -ENOMEM;
+ if (inode->i_sb->s_maxbytes - pos < count)
+ return -ENOMEM;
+
+ trace_xfile_load(xf, pos, count);
+
+ pflags = memalloc_nofs_save();
+ while (count > 0) {
+ struct folio *folio;
+ unsigned int len;
+ unsigned int offset;
+
+ if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
+ SGP_READ) < 0)
+ break;
+ if (!folio) {
+ /*
+ * No data stored at this offset, just zero the output
+ * buffer until the next page boundary.
+ */
+ len = min_t(ssize_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ memset(buf, 0, len);
+ } else {
+ if (filemap_check_wb_err(inode->i_mapping, 0)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ break;
+ }
+
+ offset = offset_in_folio(folio, pos);
+ len = min_t(ssize_t, count, folio_size(folio) - offset);
+ memcpy(buf, folio_address(folio) + offset, len);
+
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+ count -= len;
+ pos += len;
+ buf += len;
+ }
+ memalloc_nofs_restore(pflags);
+
+ if (count)
+ return -ENOMEM;
+ return 0;
+}
+
+/*
+ * Store an object. Since we're treating this file as "memory", any error or
+ * short IO is treated as a failure to allocate memory.
+ */
+int
+xfile_store(
+ struct xfile *xf,
+ const void *buf,
+ size_t count,
+ loff_t pos)
+{
+ struct inode *inode = file_inode(xf->file);
+ unsigned int pflags;
+
+ if (count > MAX_RW_COUNT)
+ return -ENOMEM;
+ if (inode->i_sb->s_maxbytes - pos < count)
+ return -ENOMEM;
+
+ trace_xfile_store(xf, pos, count);
+
+ /*
+ * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
+ * actually allocates a folio instead of erroring out.
+ */
+ if (pos + count > i_size_read(inode))
+ i_size_write(inode, pos + count);
+
+ pflags = memalloc_nofs_save();
+ while (count > 0) {
+ struct folio *folio;
+ unsigned int len;
+ unsigned int offset;
+
+ if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
+ SGP_CACHE) < 0)
+ break;
+ if (filemap_check_wb_err(inode->i_mapping, 0)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ break;
+ }
+
+ offset = offset_in_folio(folio, pos);
+ len = min_t(ssize_t, count, folio_size(folio) - offset);
+ memcpy(folio_address(folio) + offset, buf, len);
+
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+
+ count -= len;
+ pos += len;
+ buf += len;
+ }
+ memalloc_nofs_restore(pflags);
+
+ if (count)
+ return -ENOMEM;
+ return 0;
+}
+
+/* Find the next written area in the xfile data for a given offset. */
+loff_t
+xfile_seek_data(
+ struct xfile *xf,
+ loff_t pos)
+{
+ loff_t ret;
+
+ ret = vfs_llseek(xf->file, pos, SEEK_DATA);
+ trace_xfile_seek_data(xf, pos, ret);
+ return ret;
+}
+
+/*
+ * Grab the (locked) folio for a memory object. The object cannot span a folio
+ * boundary. Returns the locked folio if successful, NULL if there was no
+ * folio or it didn't cover the range requested, or an ERR_PTR on failure.
+ */
+struct folio *
+xfile_get_folio(
+ struct xfile *xf,
+ loff_t pos,
+ size_t len,
+ unsigned int flags)
+{
+ struct inode *inode = file_inode(xf->file);
+ struct folio *folio = NULL;
+ unsigned int pflags;
+ int error;
+
+ if (inode->i_sb->s_maxbytes - pos < len)
+ return ERR_PTR(-ENOMEM);
+
+ trace_xfile_get_folio(xf, pos, len);
+
+ /*
+ * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
+ * actually allocates a folio instead of erroring out.
+ */
+ if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
+ i_size_write(inode, pos + len);
+
+ pflags = memalloc_nofs_save();
+ error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
+ (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
+ memalloc_nofs_restore(pflags);
+ if (error)
+ return ERR_PTR(error);
+
+ if (!folio)
+ return NULL;
+
+ if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ return NULL;
+ }
+
+ if (filemap_check_wb_err(inode->i_mapping, 0)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ return ERR_PTR(-EIO);
+ }
+
+ /*
+ * Mark the folio dirty so that it won't be reclaimed once we drop the
+ * (potentially last) reference in xfile_put_folio.
+ */
+ if (flags & XFILE_ALLOC)
+ folio_mark_dirty(folio);
+ return folio;
+}
+
+/*
+ * Release the (locked) folio for a memory object.
+ */
+void
+xfile_put_folio(
+ struct xfile *xf,
+ struct folio *folio)
+{
+ trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
+
+ folio_unlock(folio);
+ folio_put(folio);
+}
+
+/* Discard the page cache that's backing a range of the xfile. */
+void
+xfile_discard(
+ struct xfile *xf,
+ loff_t pos,
+ u64 count)
+{
+ trace_xfile_discard(xf, pos, count);
+
+ shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
+}